Example #1
0
    def test_lock_and_till(self):
        locker = Lock("prime lock")
        got_lock = Signal()
        a_is_ready = Signal("a lock")
        b_is_ready = Signal("b lock")

        def loop(is_ready, please_stop):
            with locker:
                while not got_lock:
                    # Log.note("{{thread}} is waiting", thread=Thread.current().name)
                    locker.wait(till=Till(seconds=0))
                    is_ready.go()
                locker.wait()
                Log.note("thread is expected to get here")
        thread_a = Thread.run("a", loop, a_is_ready)
        thread_b = Thread.run("b", loop, b_is_ready)

        a_is_ready.wait()
        b_is_ready.wait()
        with locker:
            got_lock.go()
            Till(seconds=0.1).wait()  # MUST WAIT FOR a AND b TO PERFORM locker.wait()
            Log.note("leaving")
            pass
        with locker:
            Log.note("leaving again")
            pass
        Till(seconds=1).wait()

        self.assertTrue(bool(thread_a.stopped), "Thread should be done by now")
        self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
Example #2
0
    def test_memory_cleanup_with_till(self):
        objgraph.growth()

        root = Signal()
        for i in range(100000):
            if i % 1000 == 0:
                Log.note("at {{num}} tills", num=i)
            root = root | Till(seconds=100000)
            mid_mem = psutil.Process(os.getpid()).memory_info().rss
            if mid_mem > 1000 * 1000 * 1000:
                Log.note("{{num}} Till triggers created", num=i)
                break
        trigger = Signal()
        root = root | trigger

        growth = objgraph.growth(limit=4)
        growth and Log.note("More object\n{{growth}}", growth=growth)

        trigger.go()
        root.wait()  # THERE SHOULD BE NO DELAY HERE

        for _ in range(0, 20):
            try:
                Till(seconds=0.1).wait()  # LET TIMER DAEMON CLEANUP
                current = [(t, objgraph.count(t), objgraph.count(t) - c)
                           for t, c, d in growth]
                Log.note("Object count\n{{current}}", current=current)

                # NUMBER OF OBJECTS CLEANED UP SHOULD MATCH NUMBER OF OBJECTS CREATED
                for (_, _, cd), (_, _, gd) in zip(current, growth):
                    self.assertAlmostEqual(-cd, gd, places=2)
                return
            except Exception as e:
                pass
        Log.error("object counts did not go down")
Example #3
0
    def request(self, method, path, headers):
        now = Date.now()
        self.inbound_rate.add(now)
        ready = Signal(path)

        # TEST CACHE
        with self.cache_locker:
            pair = self.cache.get(path)
            if pair is None:
                self.cache[path] = (ready, None, None, now)


        if pair is not None:
            # REQUEST IS IN THE QUEUE ALREADY, WAIT
            ready, headers, response, then = pair
            if response is None:
                ready.wait()
                with self.cache_locker:
                    ready, headers, response, timestamp = self.cache.get(path)
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # TEST DB
        db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
        if db_response:
            headers, response = db_response[0]
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            with self.cache_locker:
                self.cache[path] = (ready, headers, response.encode('latin1'), now)
            ready.go()

            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # MAKE A NETWORK REQUEST
        self.todo.add((ready, method, path, headers, now))
        ready.wait()
        with self.cache_locker:
            ready, headers, response, timestamp = self.cache[path]
        return Response(
            response,
            status=200,
            headers=json.loads(headers)
        )
Example #4
0
    def request(self, method, path, headers):
        now = Date.now()
        self.inbound_rate.add(now)
        ready = Signal(path)

        # TEST CACHE
        with self.cache_locker:
            pair = self.cache.get(path)
            if pair is None:
                self.cache[path] = (ready, None, None, now)


        if pair is not None:
            # REQUEST IS IN THE QUEUE ALREADY, WAIT
            ready, headers, response, then = pair
            if response is None:
                ready.wait()
                with self.cache_locker:
                    ready, headers, response, timestamp = self.cache.get(path)
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # TEST DB
        db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data
        if db_response:
            headers, response = db_response[0]
            with self.db.transaction() as t:
                t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now))
            with self.cache_locker:
                self.cache[path] = (ready, headers, response.encode('latin1'), now)
            ready.go()

            return Response(
                response,
                status=200,
                headers=json.loads(headers)
            )

        # MAKE A NETWORK REQUEST
        self.todo.add((ready, method, path, headers, now))
        ready.wait()
        with self.cache_locker:
            ready, headers, response, timestamp = self.cache[path]
        return Response(
            response,
            status=200,
            headers=json.loads(headers)
        )
Example #5
0
    def test_till_in_loop(self):

        def loop(please_stop):
            counter = 0
            while not please_stop:
                (Till(seconds=0.001) | please_stop).wait()
                counter += 1
                Log.note("{{count}}", count=counter)

        please_stop=Signal("please_stop")
        Thread.run("loop", loop, please_stop=please_stop)
        Till(seconds=1).wait()
        with please_stop.lock:
            self.assertLessEqual(len(please_stop.job_queue), 1, "Expecting only one pending job on go")
        please_stop.go()
Example #6
0
    def wait_for_shutdown_signal(
        please_stop=False,  # ASSIGN SIGNAL TO STOP EARLY
        allow_exit=False,  # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP
        wait_forever=True  # IGNORE CHILD THREADS, NEVER EXIT.  False -> IF NO CHILD THREADS LEFT, THEN EXIT
    ):
        """
        FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL
        SHUTDOWN IS REQUESTED

        SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit"

        :param please_stop:
        :param allow_exit:
        :param wait_forever:: Assume all needed threads have been launched. When done
        :return:
        """
        if not isinstance(please_stop, Signal):
            please_stop = Signal()

        please_stop.on_go(lambda: start_new_thread(_stop_main_thread, ()))

        self_thread = Thread.current()
        if self_thread != MAIN_THREAD:
            Log.error(
                "Only the main thread can sleep forever (waiting for KeyboardInterrupt)"
            )

        if not wait_forever:
            # TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE
            pending = copy(self_thread.children)
            all = AndSignals(please_stop, len(pending))
            for p in pending:
                p.stopped.on_go(all.done)

        try:
            if allow_exit:
                _wait_for_exit(please_stop)
            else:
                _wait_for_interrupt(please_stop)
        except (KeyboardInterrupt, SystemExit) as _:
            Log.alert("SIGINT Detected!  Stopping...")
        finally:
            please_stop.go()
Example #7
0
    def test_lock_and_till(self):
        locker = Lock("prime lock")
        got_signal = Signal()
        a_is_ready = Signal("a lock")
        b_is_ready = Signal("b lock")

        Log.note("begin")

        def loop(is_ready, please_stop):
            with locker:
                while not got_signal:
                    locker.wait(till=Till(seconds=0.01))
                    is_ready.go()
                    Log.note("{{thread}} is ready",
                             thread=Thread.current().name)
                Log.note("outside loop")
                locker.wait()
                Log.note("thread is expected to get here")

        thread_a = Thread.run("a", loop, a_is_ready).release()
        thread_b = Thread.run("b", loop, b_is_ready).release()

        a_is_ready.wait()
        b_is_ready.wait()
        timeout = Till(seconds=1)
        with locker:
            got_signal.go()
            while not thread_a.stopped:
                # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE
                locker.wait(till=Till(seconds=0.1))
                Log.note("wait for a thread")
            while not thread_b.stopped:
                # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE
                locker.wait(till=Till(seconds=0.1))
                Log.note("wait for b thread")
        thread_a.join()
        thread_b.join()
        if timeout:
            Log.error("Took too long")

        self.assertTrue(bool(thread_a.stopped), "Thread should be done by now")
        self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
Example #8
0
    def test_job_queue_in_signal(self):

        gc.collect()
        start_mem = psutil.Process(os.getpid()).memory_info().rss
        Log.note("Start memory {{mem|comma}}", mem=start_mem)

        main = Signal()
        result = [main | Signal() for _ in range(10000)]

        mid_mem = psutil.Process(os.getpid()).memory_info().rss
        Log.note("Mid memory {{mem|comma}}", mem=mid_mem)

        del result
        gc.collect()

        end_mem = psutil.Process(os.getpid()).memory_info().rss
        Log.note("End memory {{mem|comma}}", mem=end_mem)

        main.go()  # NOT NEEDED, BUT INTERESTING

        self.assertLess(end_mem, (start_mem + mid_mem) / 2,
                        "end memory should be closer to start")
Example #9
0
class Clogger:

    # Singleton of the look-ahead scanner Clogger
    SINGLE_CLOGGER = None
    def __new__(cls, *args, **kwargs):
        if cls.SINGLE_CLOGGER is None:
            cls.SINGLE_CLOGGER = object.__new__(cls)
        return cls.SINGLE_CLOGGER


    def __init__(self, conn=None, tuid_service=None, start_workers=True, new_table=False, kwargs=None):
        try:
            self.config = kwargs
            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                kwargs=self.config.tuid, conn=self.conn, clogger=self
            )
            self.rev_locker = Lock()
            self.working_locker = Lock()

            if new_table:
                with self.conn.transaction() as t:
                    t.execute("DROP TABLE IF EXISTS csetLog")

            self.init_db()
            self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")

            if 'tuid' in self.config:
                self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            self.backfill_thread = None
            self.tipfill_thread = None
            self.deletion_thread = None
            self.maintenance_thread = None

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(
                    MINIMUM_PERMANENT_CSETS - numrevs,
                    oldest_rev,
                    timestamp=False
                )

            Log.note(
                "Table is filled with atleast {{minim}} entries.",
                minim=MINIMUM_PERMANENT_CSETS
            )

            if start_workers:
                self.start_workers()
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))


    def start_backfilling(self):
        if not self.backfill_thread:
            self.backfill_thread = Thread.run('clogger-backfill', self.fill_backward_with_list)


    def start_tipfillling(self):
        if not self.tipfill_thread:
            self.tipfill_thread = Thread.run('clogger-tip', self.fill_forward_continuous)


    def start_maintenance(self):
        if not self.maintenance_thread:
            self.maintenance_thread = Thread.run('clogger-maintenance', self.csetLog_maintenance)


    def start_deleter(self):
        if not self.deletion_thread:
            self.deletion_thread = Thread.run('clogger-deleter', self.csetLog_deleter)


    def start_workers(self):
        self.start_tipfillling()
        self.start_backfilling()
        self.start_maintenance()
        self.start_deleter()
        Log.note("Started clogger workers.")


    def init_db(self):
        with self.conn.transaction() as t:
            t.execute('''
            CREATE TABLE IF NOT EXISTS csetLog (
                revnum         INTEGER PRIMARY KEY,
                revision       CHAR(12) NOT NULL,
                timestamp      INTEGER
            );''')


    def disable_all(self):
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True


    def revnum(self):
        """
        :return: max revnum that was added
        """
        return coalesce(self.conn.get_one("SELECT max(revnum) as revnum FROM csetLog")[0], 0)


    def get_tip(self, transaction):
        return transaction.get_one(
            "SELECT max(revnum) as revnum, revision FROM csetLog"
        )


    def get_tail(self, transaction):
        return transaction.get_one(
            "SELECT min(revnum) as revnum, revision FROM csetLog"
        )


    def _get_clog(self, clog_url):
        try:
            Log.note("Searching through changelog {{url}}", url=clog_url)
            clog_obj = http.get_json(clog_url, retry=RETRY)
            return clog_obj
        except Exception as e:
            Log.error(
                "Unexpected error getting changset-log for {{url}}: {{error}}",
                url=clog_url,
                error=e
            )


    def _get_one_revision(self, transaction, cset_entry):
        # Returns a single revision if it exists
        _, rev, _ = cset_entry
        return transaction.get_one("SELECT revision FROM csetLog WHERE revision=?", (rev,))


    def _get_one_revnum(self, transaction, rev):
        # Returns a single revnum if it exists
        return transaction.get_one("SELECT revnum FROM csetLog WHERE revision=?", (rev,))


    def _get_revnum_range(self, transaction, revnum1, revnum2):
        # Returns a range of revision numbers (that is inclusive)
        high_num = max(revnum1, revnum2)
        low_num = min(revnum1, revnum2)

        return transaction.query(
            "SELECT revnum, revision FROM csetLog WHERE "
            "revnum >= " + str(low_num) + " AND revnum <= " + str(high_num)
        ).data


    def recompute_table_revnums(self):
        '''
        Recomputes the revnums for the csetLog table
        by creating a new table, and copying csetLog to
        it. The INTEGER PRIMARY KEY in the temp table auto increments
        as rows are added.

        IMPORTANT: Only call this after acquiring the
                   lock `self.working_locker`.
        :return:
        '''
        with self.conn.transaction() as t:
            t.execute('''
            CREATE TABLE temp (
                revnum         INTEGER PRIMARY KEY,
                revision       CHAR(12) NOT NULL,
                timestamp      INTEGER
            );''')

            t.execute(
                "INSERT INTO temp (revision, timestamp) "
                "SELECT revision, timestamp FROM csetlog ORDER BY revnum ASC"
            )

            t.execute("DROP TABLE csetLog;")
            t.execute("ALTER TABLE temp RENAME TO csetLog;")


    def check_for_maintenance(self):
        '''
        Returns True if the maintenance worker should be run now,
        and False otherwise.
        :return:
        '''
        numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
        Log.note("Number of csets in csetLog table: {{num}}", num=numrevs)
        if numrevs >= SIGNAL_MAINTENANCE_CSETS:
            return True
        return False


    def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True):
        '''
        Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
        based on how changesets are found in the changelog. Going forwards or backwards is dealt
        with by flipping the list
        :param ordered_cset_list: Order given from changeset log searching.
        :param timestamp: If false, records are kept indefinitely
                          but if holes exist: (delete, None, delete, None)
                          those delete's with None's around them
                          will not be deleted.
        :param numbered: If True, this function will number the revision list
                         by going forward from max(revNum), else it'll go backwards
                         from revNum, then add X to all revnums and self.next_revnum
                         where X is the length of ordered_rev_list
        :return:
        '''
        with self.conn.transaction() as t:
            current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
            current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
            if not current_min or not current_max:
                current_min = 0
                current_max = 0

            direction = -1
            start = current_min - 1
            if number_forward:
                direction = 1
                start = current_max + 1
                ordered_rev_list = ordered_rev_list[::-1]

            insert_list = [
                (
                    start + direction * count,
                    rev,
                    int(time.time()) if timestamp else -1
                )
                for count, rev in enumerate(ordered_rev_list)
            ]

            # In case of overlapping requests
            fmt_insert_list = []
            for cset_entry in insert_list:
                tmp = self._get_one_revision(t, cset_entry)
                if not tmp:
                    fmt_insert_list.append(cset_entry)

            for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE):
                t.execute(
                    "INSERT INTO csetLog (revnum, revision, timestamp)" +
                    " VALUES " +
                    sql_list(
                        quote_set((revnum, revision, timestamp))
                        for revnum, revision, timestamp in tmp_insert_list
                    )
                )

            # Move the revision numbers forward if needed
            self.recompute_table_revnums()

        # Start a maintenance run if needed
        if self.check_for_maintenance():
            Log.note("Scheduling maintenance run on clogger.")
            self.maintenance_signal.go()


    def _fill_in_range(self, parent_cset, child_cset, timestamp=False, number_forward=True):
        '''
        Fills cset logs in a certain range. 'parent_cset' can be an int and in that case,
        we get that many changesets instead. If parent_cset is an int, then we consider
        that we are going backwards (number_forward is False) and we ignore the first
        changeset of the first log, and we ignore the setting for number_forward.
        Otherwise, we continue until we find the given 'parent_cset'.
        :param parent_cset:
        :param child_cset:
        :param timestamp:
        :param number_forward:
        :return:
        '''
        csets_to_add = []
        found_parent = False
        find_parent = False
        if type(parent_cset) != int:
            find_parent = True
        elif parent_cset >= MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG:
            Log.warning(
                "Requested number of new changesets {{num}} is too high. "
                "Max number that can be requested is {{maxnum}}.",
                num=parent_cset,
                maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
            )
            return None

        csets_found = 0
        clogs_seen = 0
        final_rev = child_cset
        while not found_parent and clogs_seen < MAX_BACKFILL_CLOGS:
            clog_url = str(HG_URL) + "/" + self.config.hg.branch + "/json-log/" + final_rev
            clog_obj = self._get_clog(clog_url)
            clog_csets_list = list(clog_obj['changesets'])
            for clog_cset in clog_csets_list[:-1]:
                if not number_forward and csets_found <= 0:
                    # Skip this entry it already exists
                    csets_found += 1
                    continue

                nodes_cset = clog_cset['node'][:12]
                if find_parent:
                    if nodes_cset == parent_cset:
                        found_parent = True
                        if not number_forward:
                            # When going forward this entry is
                            # the given parent
                            csets_to_add.append(nodes_cset)
                        break
                else:
                    if csets_found + 1 > parent_cset:
                        found_parent = True
                        if not number_forward:
                            # When going forward this entry is
                            # the given parent (which is supposed
                            # to already exist)
                            csets_to_add.append(nodes_cset)
                        break
                    csets_found += 1
                csets_to_add.append(nodes_cset)
            if found_parent == True:
                break

            clogs_seen += 1
            final_rev = clog_csets_list[-1]['node'][:12]

        if found_parent:
            self.add_cset_entries(csets_to_add, timestamp=timestamp, number_forward=number_forward)
        else:
            Log.warning(
                "Couldn't find the end of the request for {{request}}. "
                "Max number that can be requested through _fill_in_range is {{maxnum}}.",
                request={
                    'parent_cset': parent_cset,
                    'child_cset':child_cset,
                    'number_forward': number_forward
                },
                maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG
            )
            return None
        return csets_to_add


    def initialize_to_range(self, old_rev, new_rev, delete_old=True):
        '''
        Used in service testing to get to very old
        changesets quickly.
        :param old_rev: The oldest revision to keep
        :param new_rev: The revision to start searching from
        :return:
        '''
        old_settings = [
            self.disable_tipfilling,
            self.disable_backfilling,
            self.disable_maintenance,
            self.disable_deletion
        ]
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True

        old_rev = old_rev[:12]
        new_rev = new_rev[:12]

        with self.working_locker:
            if delete_old:
                with self.conn.transaction() as t:
                    t.execute("DELETE FROM csetLog")
            with self.conn.transaction() as t:
                t.execute(
                    "INSERT INTO csetLog (revision, timestamp) VALUES " +
                    quote_set((new_rev, -1))
                )
            self._fill_in_range(old_rev, new_rev, timestamp=True, number_forward=False)

        self.disable_tipfilling = old_settings[0]
        self.disable_backfilling = old_settings[1]
        self.disable_maintenance = old_settings[2]
        self.disable_deletion = old_settings[3]


    def fill_backward_with_list(self, please_stop=None):
        '''
        Expects requests of the tuple form: (parent_cset, timestamp)
        parent_cset can be an int X to go back by X changesets, or
        a string to search for going backwards in time. If timestamp
        is false, no timestamps will be added to the entries.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.csets_todo_backwards.pop(till=please_stop)
                if please_stop:
                    break

                # If backfilling is disabled, all requests
                # are ignored.
                if self.disable_backfilling:
                    Till(till=CSET_BACKFILL_WAIT_TIME).wait()
                    continue

                if request:
                    parent_cset, timestamp = request
                else:
                    continue

                with self.working_locker:
                    with self.conn.transaction() as t:
                        parent_revnum = self._get_one_revnum(t, parent_cset)
                    if parent_revnum:
                        continue

                    with self.conn.transaction() as t:
                        _, oldest_revision = self.get_tail(t)

                    self._fill_in_range(
                        parent_cset,
                        oldest_revision,
                        timestamp=timestamp,
                        number_forward=False
                    )
                Log.note("Finished {{cset}}", cset=parent_cset)
            except Exception as e:
                Log.warning("Unknown error occurred during backfill: ", cause=e)


    def update_tip(self):
        '''
        Returns False if the tip is already at the newest, or True
        if an update has taken place.
        :return:
        '''
        clog_obj = self._get_clog(
            str(HG_URL) + "/" + self.config.hg.branch + "/json-log/tip"
        )

        # Get current tip in DB
        with self.conn.transaction() as t:
            _, newest_known_rev = self.get_tip(t)

        # If we are still at the newest, wait for CSET_TIP_WAIT_TIME seconds
        # before checking again.
        first_clog_entry = clog_obj['changesets'][0]['node'][:12]
        if newest_known_rev == first_clog_entry:
            return False

        csets_to_gather = None
        if not newest_known_rev:
            Log.note(
                "No revisions found in table, adding {{minim}} entries...",
                minim=MINIMUM_PERMANENT_CSETS
            )
            csets_to_gather = MINIMUM_PERMANENT_CSETS

        found_newest_known = False
        csets_to_add = []
        csets_found = 0
        clogs_seen = 0
        Log.note("Found new revisions. Updating csetLog tip to {{rev}}...", rev=first_clog_entry)
        while not found_newest_known and clogs_seen < MAX_TIPFILL_CLOGS:
            clog_csets_list = list(clog_obj['changesets'])
            for clog_cset in clog_csets_list[:-1]:
                nodes_cset = clog_cset['node'][:12]
                if not csets_to_gather:
                    if nodes_cset == newest_known_rev:
                        found_newest_known = True
                        break
                else:
                    if csets_found >= csets_to_gather:
                        found_newest_known = True
                        break
                csets_found += 1
                csets_to_add.append(nodes_cset)
            if not found_newest_known:
                # Get the next page
                clogs_seen += 1
                final_rev = clog_csets_list[-1]['node'][:12]
                clog_url = str(HG_URL) + "/" + self.config.hg.branch + "/json-log/" + final_rev
                clog_obj = self._get_clog(clog_url)

        if clogs_seen >= MAX_TIPFILL_CLOGS:
            Log.error(
                "Too many changesets, can't find last tip or the number is too high: {{rev}}. "
                "Maximum possible to request is {{maxnum}}",
                rev=coalesce(newest_known_rev, csets_to_gather),
                maxnum=MAX_TIPFILL_CLOGS * CHANGESETS_PER_CLOG
            )
            return False

        with self.working_locker:
            Log.note("Adding {{csets}}", csets=csets_to_add)
            self.add_cset_entries(csets_to_add, timestamp=False)
        return True


    def fill_forward_continuous(self, please_stop=None):
        while not please_stop:
            try:
                while not please_stop and not self.disable_tipfilling and self.update_tip():
                    pass
                (please_stop | Till(seconds=CSET_TIP_WAIT_TIME)).wait()
            except Exception as e:
                Log.warning("Unknown error occurred during tip filling:", cause=e)


    def csetLog_maintenance(self, please_stop=None):
        '''
        Handles deleting old csetLog entries and timestamping
        revisions once they pass the length for permanent
        storage for deletion later.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                # Wait until something signals the maintenance cycle
                # to begin (or end).
                (self.maintenance_signal | please_stop).wait()

                if please_stop:
                    break
                if self.disable_maintenance:
                    continue

                Log.warning(
                    "Starting clog maintenance. Since this doesn't start often, "
                    "we need to explicitly see when it's started with this warning."
                )

                # Reset signal so we don't request
                # maintenance infinitely.
                with self.maintenance_signal.lock:
                    self.maintenance_signal._go = False

                with self.working_locker:
                    all_data = None
                    with self.conn.transaction() as t:
                        all_data = sorted(
                            t.get("SELECT revnum, revision, timestamp FROM csetLog"),
                            key=lambda x: int(x[0])
                        )

                    # Restore maximum permanents (if overflowing)
                    new_data = []
                    modified = False
                    for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]):
                        if count < MINIMUM_PERMANENT_CSETS:
                            if timestamp != -1:
                                modified = True
                                new_data.append((revnum, revision, -1))
                            else:
                                new_data.append((revnum, revision, timestamp))
                        elif type(timestamp) != int or timestamp == -1:
                            modified = True
                            new_data.append((revnum, revision, int(time.time())))
                        else:
                            new_data.append((revnum, revision, timestamp))

                    # Delete annotations at revisions with timestamps
                    # that are too old. The csetLog entries will have
                    # their timestamps reset here.
                    new_data1 = []
                    annrevs_to_del = []
                    current_time = time.time()
                    for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]):
                        new_timestamp = timestamp
                        if timestamp != -1:
                            if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
                                modified = True
                                new_timestamp = current_time
                                annrevs_to_del.append(revision)
                        new_data1.append((revnum, revision, new_timestamp))

                    if len(annrevs_to_del) > 0:
                        # Delete any latestFileMod and annotation entries
                        # that are too old.
                        Log.note(
                            "Deleting annotations and latestFileMod for revisions for being "
                            "older than {{oldest}}: {{revisions}}",
                            oldest=TIME_TO_KEEP_ANNOTATIONS,
                            revisions=annrevs_to_del
                        )
                        with self.conn.transaction() as t:
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN " +
                                quote_set(annrevs_to_del)
                            )
                            t.execute(
                                "DELETE FROM annotations WHERE revision IN " +
                                quote_set(annrevs_to_del)
                            )

                    # Delete any overflowing entries
                    new_data2 = new_data1
                    reved_all_data = all_data[::-1]
                    deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
                    delete_overflowing_revstart = None
                    if len(deleted_data) > 0:
                        _, delete_overflowing_revstart, _ = deleted_data[0]
                        new_data2 = set(all_data) - set(deleted_data)

                        # Update old frontiers if requested, otherwise
                        # they will all get deleted by the csetLog_deleter
                        # worker
                        if UPDATE_VERY_OLD_FRONTIERS:
                            _, max_revision, _ = all_data[-1]
                            for _, revision, _ in deleted_data:
                                with self.conn.transaction() as t:
                                    old_files = t.get(
                                        "SELECT file FROM latestFileMod WHERE revision=?",
                                        (revision,)
                                    )
                                if old_files is None or len(old_files) <= 0:
                                    continue

                                self.tuid_service.get_tuids_from_files(
                                    old_files,
                                    max_revision,
                                    going_forward=True,
                                )

                                still_exist = True
                                while still_exist and not please_stop:
                                    Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait()
                                    with self.conn.transaction() as t:
                                        old_files = t.get(
                                            "SELECT file FROM latestFileMod WHERE revision=?",
                                            (revision,)
                                        )
                                    if old_files is None or len(old_files) <= 0:
                                        still_exist = False

                    # Update table and schedule a deletion
                    if modified:
                        with self.conn.transaction() as t:
                            insert_into_db_chunked(
                                t,
                                new_data2,
                                "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES "
                            )
                    if not deleted_data:
                        continue

                    Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data))
                    self.deletions_todo.add(delete_overflowing_revstart)
            except Exception as e:
                Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e)
        return


    def csetLog_deleter(self, please_stop=None):
        '''
        Deletes changesets from the csetLog table
        and also changesets from the annotation table
        that have revisions matching the given changesets.
        Accepts lists of csets from self.deletions_todo.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.deletions_todo.pop(till=please_stop)
                if please_stop:
                    break

                # If deletion is disabled, ignore the current
                # request - it will need to be re-requested.
                if self.disable_deletion:
                    Till(till=CSET_DELETION_WAIT_TIME).wait()
                    continue

                with self.working_locker:
                    first_cset = request

                    # Since we are deleting and moving stuff around in the
                    # TUID tables, we need everything to be contained in
                    # one transaction with no interruptions.
                    with self.conn.transaction() as t:
                        revnum = self._get_one_revnum(t, first_cset)[0]
                        csets_to_del = t.get(
                            "SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,)
                        )
                        csets_to_del = [cset for _, cset in csets_to_del]
                        existing_frontiers = t.query(
                            "SELECT revision FROM latestFileMod WHERE revision IN " +
                            quote_set(csets_to_del)
                        ).data

                        existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)]
                        Log.note(
                            "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
                            csets=csets_to_del
                        )

                        if len(existing_frontiers) > 0:
                            # This handles files which no longer exist anymore in
                            # the main branch.
                            Log.note(
                                "Deleting existing frontiers for revisions: {{revisions}}",
                                revisions=existing_frontiers
                            )
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN " +
                                quote_set(existing_frontiers)
                            )

                        Log.note("Deleting annotations...")
                        t.execute(
                            "DELETE FROM annotations WHERE revision IN " +
                            quote_set(csets_to_del)
                        )

                        Log.note(
                            "Deleting {{num_entries}} csetLog entries...",
                            num_entries=len(csets_to_del)
                        )
                        t.execute(
                            "DELETE FROM csetLog WHERE revision IN " +
                            quote_set(csets_to_del)
                        )

                    # Recalculate the revnums
                    self.recompute_table_revnums()
            except Exception as e:
                Log.warning("Unexpected error occured while deleting from csetLog:", cause=e)
                Till(seconds=CSET_DELETION_WAIT_TIME).wait()
        return


    def get_old_cset_revnum(self, revision):
        self.csets_todo_backwards.add((revision, True))

        revnum = None
        timeout = Till(seconds=BACKFILL_REVNUM_TIMEOUT)
        while not timeout:
            with self.conn.transaction() as t:
                revnum = self._get_one_revnum(t, revision)

            if revnum and revnum[0] >= 0:
                break
            elif revnum[0] < 0:
                Log.note("Waiting for table to recompute...")
            else:
                Log.note("Waiting for backfill to complete...")
            Till(seconds=CSET_BACKFILL_WAIT_TIME).wait()

        if timeout:
            Log.error(
                "Cannot find revision {{rev}} after waiting {{timeout}} seconds",
                rev=revision,
                timeout=BACKFILL_REVNUM_TIMEOUT
            )
        return revnum


    def get_revnnums_from_range(self, revision1, revision2):
        with self.conn.transaction() as t:
            revnum1 = self._get_one_revnum(t, revision1)
            revnum2 = self._get_one_revnum(t, revision2)
        if not revnum1 or not revnum2:
            did_an_update = self.update_tip()
            if did_an_update:
                with self.conn.transaction() as t:
                    revnum1 = self._get_one_revnum(t, revision1)
                    revnum2 = self._get_one_revnum(t, revision2)

            if not revnum1:
                revnum1 = self.get_old_cset_revnum(revision1)
                # Refresh the second entry
                with self.conn.transaction() as t:
                    revnum2 = self._get_one_revnum(t, revision2)

            if not revnum2:
                revnum2 = self.get_old_cset_revnum(revision2)

                # The first revnum might change also
                with self.conn.transaction() as t:
                    revnum1 = self._get_one_revnum(t, revision1)

        with self.conn.transaction() as t:
            result = self._get_revnum_range(t, revnum1[0], revnum2[0])
        return sorted(
            result,
            key=lambda x: int(x[0])
        )
Example #10
0
class Python(object):

    def __init__(self, name, config):
        config = wrap(config)
        if config.debug.logs:
            Log.error("not allowed to configure logging on other process")

        self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True)
        self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config)))

        self.lock = Lock("wait for response from "+name)
        self.current_task = None
        self.current_response = None
        self.current_error = None

        self.daemon = Thread.run("", self._daemon)
        self.errors = Thread.run("", self._stderr)

    def _execute(self, command):
        with self.lock:
            if self.current_task is not None:
                self.current_task.wait()
            self.current_task = Signal()
            self.current_response = None
            self.current_error = None
        self.process.stdin.add(value2json(command))
        self.current_task.wait()
        with self.lock:
            try:
                if self.current_error:
                    Log.error("problem with process call", cause=Except.new_instance(self.current_error))
                else:
                    return self.current_response
            finally:
                self.current_task = None
                self.current_response = None
                self.current_error = None

    def _daemon(self, please_stop):
        while not please_stop:
            line = self.process.stdout.pop(till=please_stop)
            if line == THREAD_STOP:
                break
            try:
                data = json2value(line.decode('utf8'))
                if "log" in data:
                    Log.main_log.write(*data.log)
                elif "out" in data:
                    with self.lock:
                        self.current_response = data.out
                        self.current_task.go()
                elif "err" in data:
                    with self.lock:
                        self.current_error = data.err
                        self.current_task.go()
            except Exception:
                Log.note("non-json line: {{line}}", line=line)
        DEBUG and Log.note("stdout reader is done")

    def _stderr(self, please_stop):
        while not please_stop:
            try:
                line = self.process.stderr.pop(till=please_stop)
                if line == THREAD_STOP:
                    please_stop.go()
                    break
                Log.note("Error line from {{name}}({{pid}}): {{line}}", line=line, name=self.process.name, pid=self.process.pid)
            except Exception as e:
                Log.error("could not process line", cause=e)

    def import_module(self, module_name, var_names=None):
        if var_names is None:
            self._execute({"import": module_name})
        else:
            self._execute({"import": {"from": module_name, "vars": var_names}})

    def set(self, var_name, value):
        self._execute({"set": {var_name, value}})

    def get(self, var_name):
        return self._execute({"get": var_name})

    def execute_script(self, script):
        return self._execute({"exec": script})

    def __getattr__(self, item):
        def output(*args, **kwargs):
            if len(args):
                if len(kwargs.keys()):
                    Log.error("Not allowed to use both args and kwargs")
                return self._execute({item: args})
            else:
                return self._execute({item: kwargs})
        return output

    def stop(self):
        self._execute({"stop": {}})
        self.process.join()
        self.daemon.stop()
        self.errors.stop()
class PersistentQueue(object):
    """
    THREAD-SAFE, PERSISTENT QUEUE

    CAN HANDLE MANY PRODUCERS, BUT THE pop(), commit() IDIOM CAN HANDLE ONLY
    ONE CONSUMER.

    IT IS IMPORTANT YOU commit() or close(), OTHERWISE NOTHING COMES OFF THE QUEUE
    """
    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " +
                         self.file.name)
        self.please_stop = Signal()
        self.db = Data()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                with suppress_exception:
                    delta = json2value(line)
                    apply_delta(self.db, delta)
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                with suppress_exception:
                    if k != "status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost", num=lost)

            DEBUG and Log.note(
                "Persistent queue {{name}} found with {{num}} items",
                name=self.file.abspath,
                num=len(self))
        else:
            self.db.status = Data(start=0, end=0)
            self.start = self.db.status.start
            DEBUG and Log.note("New persistent queue {{name}}",
                               name=self.file.abspath)

    def _add_pending(self, delta):
        delta = to_data(delta)
        self.pending.append(delta)

    def _apply_pending(self):
        for delta in self.pending:
            apply_delta(self.db, delta)
        self.pending = []

    def __iter__(self):
        """
        BLOCKING ITERATOR
        """
        while not self.please_stop:
            try:
                value = self.pop()
                if value is not THREAD_STOP:
                    yield value
            except Exception as e:
                Log.warning("Tell me about what happened here", cause=e)

    def add(self, value):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed")

            if value is THREAD_STOP:
                DEBUG and Log.note("Stop is seen in persistent queue")
                self.please_stop.go()
                return

            self._add_pending({"add": {str(self.db.status.end): value}})
            self.db.status.end += 1
            self._add_pending({"add": {"status.end": self.db.status.end}})
            self._commit()
        return self

    def __len__(self):
        with self.lock:
            return self.db.status.end - self.start

    def __getitem__(self, item):
        return self.db[str(item + self.start)]

    def pop(self, timeout=None):
        """
        :param timeout: OPTIONAL DURATION
        :return: None, IF timeout PASSES
        """
        with self.lock:
            while not self.please_stop:
                if self.db.status.end > self.start:
                    value = self.db[str(self.start)]
                    self.start += 1
                    return value

                if timeout is not None:
                    with suppress_exception:
                        self.lock.wait(timeout=timeout)
                        if self.db.status.end <= self.start:
                            return None
                else:
                    self.lock.wait()

            DEBUG and Log.note("persistent queue already stopped")
            return THREAD_STOP

    def pop_all(self):
        """
        NON-BLOCKING POP ALL IN QUEUE, IF ANY
        """
        with self.lock:
            if self.please_stop:
                return [THREAD_STOP]
            if self.db.status.end == self.start:
                return []

            output = []
            for i in range(self.start, self.db.status.end):
                output.append(self.db[str(i)])

            self.start = self.db.status.end
            return output

    def rollback(self):
        with self.lock:
            if self.closed:
                return
            self.start = self.db.status.start
            self.pending = []

    def commit(self):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed, commit not allowed")

            try:
                self._add_pending({"add": {"status.start": self.start}})
                for i in range(self.db.status.start, self.start):
                    self._add_pending({"remove": str(i)})

                if self.db.status.end - self.start < 10 or randoms.range(
                        0, 1000) == 0:  # FORCE RE-WRITE TO LIMIT FILE SIZE
                    # SIMPLY RE-WRITE FILE
                    if DEBUG:
                        Log.note(
                            "Re-write {{num_keys}} keys to persistent queue",
                            num_keys=self.db.status.end - self.start)
                        for k in self.db.keys():
                            if k == "status" or int(k) >= self.db.status.start:
                                continue
                            Log.error("Not expecting {{key}}", key=k)
                    self._commit()
                    self.file.write(value2json({"add": self.db}) + "\n")
                else:
                    self._commit()
            except Exception as e:
                raise e

    def _commit(self):
        self.file.append("\n".join(value2json(p) for p in self.pending))
        self._apply_pending()

    def close(self):
        self.please_stop.go()
        with self.lock:
            if self.db is None:
                return

            self.add(THREAD_STOP)

            if self.db.status.end == self.start:
                DEBUG and Log.note("persistent queue clear and closed")
                self.file.delete()
            else:
                DEBUG and Log.note(
                    "persistent queue closed with {{num}} items left",
                    num=len(self))
                try:
                    self._add_pending({"add": {"status.start": self.start}})
                    for i in range(self.db.status.start, self.start):
                        self._add_pending({"remove": str(i)})
                    self.file.write(
                        value2json({"add": self.db}) + "\n" +
                        ("\n".join(value2json(p)
                                   for p in self.pending)) + "\n")
                    self._apply_pending()
                except Exception as e:
                    raise e
            self.db = None

    @property
    def closed(self):
        with self.lock:
            return self.db is None
Example #12
0
class PersistentQueue(object):
    """
    THREAD-SAFE, PERSISTENT QUEUE

    CAN HANDLE MANY PRODUCERS, BUT THE pop(), commit() IDIOM CAN HANDLE ONLY
    ONE CONSUMER.

    IT IS IMPORTANT YOU commit() or close(), OTHERWISE NOTHING COMES OFF THE QUEUE
    """

    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " + self.file.name)
        self.please_stop = Signal()
        self.db = Data()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                with suppress_exception:
                    delta = mo_json.json2value(line)
                    apply_delta(self.db, delta)
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                with suppress_exception:
                    if k!="status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost",  num= lost)

            DEBUG and Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self))
        else:
            self.db.status = Data(
                start=0,
                end=0
            )
            self.start = self.db.status.start
            DEBUG and Log.note("New persistent queue {{name}}", name=self.file.abspath)

    def _add_pending(self, delta):
        delta = wrap(delta)
        self.pending.append(delta)

    def _apply_pending(self):
        for delta in self.pending:
            apply_delta(self.db, delta)
        self.pending = []


    def __iter__(self):
        """
        BLOCKING ITERATOR
        """
        while not self.please_stop:
            try:
                value = self.pop()
                if value is not THREAD_STOP:
                    yield value
            except Exception as e:
                Log.warning("Tell me about what happened here", cause=e)

    def add(self, value):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed")

            if value is THREAD_STOP:
                DEBUG and Log.note("Stop is seen in persistent queue")
                self.please_stop.go()
                return

            self._add_pending({"add": {str(self.db.status.end): value}})
            self.db.status.end += 1
            self._add_pending({"add": {"status.end": self.db.status.end}})
            self._commit()
        return self

    def __len__(self):
        with self.lock:
            return self.db.status.end - self.start

    def __getitem__(self, item):
        return self.db[str(item + self.start)]

    def pop(self, timeout=None):
        """
        :param timeout: OPTIONAL DURATION
        :return: None, IF timeout PASSES
        """
        with self.lock:
            while not self.please_stop:
                if self.db.status.end > self.start:
                    value = self.db[str(self.start)]
                    self.start += 1
                    return value

                if timeout is not None:
                    with suppress_exception:
                        self.lock.wait(timeout=timeout)
                        if self.db.status.end <= self.start:
                            return None
                else:
                    self.lock.wait()

            DEBUG and Log.note("persistent queue already stopped")
            return THREAD_STOP

    def pop_all(self):
        """
        NON-BLOCKING POP ALL IN QUEUE, IF ANY
        """
        with self.lock:
            if self.please_stop:
                return [THREAD_STOP]
            if self.db.status.end == self.start:
                return []

            output = []
            for i in range(self.start, self.db.status.end):
                output.append(self.db[str(i)])

            self.start = self.db.status.end
            return output

    def rollback(self):
        with self.lock:
            if self.closed:
                return
            self.start = self.db.status.start
            self.pending = []

    def commit(self):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed, commit not allowed")

            try:
                self._add_pending({"add": {"status.start": self.start}})
                for i in range(self.db.status.start, self.start):
                    self._add_pending({"remove": str(i)})

                if self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0:  # FORCE RE-WRITE TO LIMIT FILE SIZE
                    # SIMPLY RE-WRITE FILE
                    if DEBUG:
                        Log.note("Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start)
                        for k in self.db.keys():
                            if k == "status" or int(k) >= self.db.status.start:
                                continue
                            Log.error("Not expecting {{key}}", key=k)
                    self._commit()
                    self.file.write(mo_json.value2json({"add": self.db}) + "\n")
                else:
                    self._commit()
            except Exception as e:
                raise e

    def _commit(self):
        self.file.append("\n".join(mo_json.value2json(p) for p in self.pending))
        self._apply_pending()

    def close(self):
        self.please_stop.go()
        with self.lock:
            if self.db is None:
                return

            self.add(THREAD_STOP)

            if self.db.status.end == self.start:
                DEBUG and Log.note("persistent queue clear and closed")
                self.file.delete()
            else:
                DEBUG and Log.note("persistent queue closed with {{num}} items left", num=len(self))
                try:
                    self._add_pending({"add": {"status.start": self.start}})
                    for i in range(self.db.status.start, self.start):
                        self._add_pending({"remove": str(i)})
                    self.file.write(mo_json.value2json({"add": self.db}) + "\n" + ("\n".join(mo_json.value2json(p) for p in self.pending)) + "\n")
                    self._apply_pending()
                except Exception as e:
                    raise e
            self.db = None

    @property
    def closed(self):
        with self.lock:
            return self.db is None
Example #13
0
class Queue(object):
    """
     SIMPLE MESSAGE QUEUE, multiprocessing.Queue REQUIRES SERIALIZATION, WHICH
     IS DIFFICULT TO USE JUST BETWEEN THREADS (SERIALIZATION REQUIRED)
    """
    def __init__(self,
                 name,
                 max=None,
                 silent=False,
                 unique=False,
                 allow_add_after_close=False):
        """
        max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK
        silent - COMPLAIN IF THE READERS ARE TOO SLOW
        unique - SET True IF YOU WANT ONLY ONE INSTANCE IN THE QUEUE AT A TIME
        """
        if not _Log:
            _late_import()

        self.name = name
        self.max = coalesce(max, 2**10)
        self.silent = silent
        self.allow_add_after_close = allow_add_after_close
        self.unique = unique
        self.please_stop = Signal("stop signal for " + name)
        self.lock = Lock("lock for queue " + name)
        self.queue = deque()
        self.next_warning = time()  # FOR DEBUGGING

    def __iter__(self):
        try:
            while True:
                value = self.pop(self.please_stop)
                if value is THREAD_STOP:
                    break
                if value is not None:
                    yield value
        except Exception as e:
            _Log.warning("Tell me about what happened here", e)

        if not self.silent:
            _Log.note("queue iterator is done")

    def add(self, value, timeout=None):
        with self.lock:
            if value is THREAD_STOP:
                # INSIDE THE lock SO THAT EXITING WILL RELEASE wait()
                self.queue.append(value)
                self.please_stop.go()
                return

            self._wait_for_queue_space(timeout=timeout)
            if self.please_stop and not self.allow_add_after_close:
                _Log.error("Do not add to closed queue")
            else:
                if self.unique:
                    if value not in self.queue:
                        self.queue.append(value)
                else:
                    self.queue.append(value)
        return self

    def push(self, value):
        """
        SNEAK value TO FRONT OF THE QUEUE
        """
        if self.please_stop and not self.allow_add_after_close:
            _Log.error("Do not push to closed queue")

        with self.lock:
            self._wait_for_queue_space()
            if not self.please_stop:
                self.queue.appendleft(value)
        return self

    def pop_message(self, till=None):
        """
        RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE
        DUMMY IMPLEMENTATION FOR DEBUGGING
        """

        if till is not None and not isinstance(till, Signal):
            _Log.error("Expecting a signal")
        return Null, self.pop(till=till)

    def extend(self, values):
        if self.please_stop and not self.allow_add_after_close:
            _Log.error("Do not push to closed queue")

        with self.lock:
            # ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE
            self._wait_for_queue_space()
            if not self.please_stop:
                if self.unique:
                    for v in values:
                        if v is THREAD_STOP:
                            self.please_stop.go()
                            continue
                        if v not in self.queue:
                            self.queue.append(v)
                else:
                    for v in values:
                        if v is THREAD_STOP:
                            self.please_stop.go()
                            continue
                        self.queue.append(v)
        return self

    def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME):
        """
        EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE
        """
        wait_time = 5

        if DEBUG and len(self.queue) > 1 * 1000 * 1000:
            Log.warning("Queue {{name}} has over a million items")

        now = time()
        if timeout != None:
            time_to_stop_waiting = now + timeout
        else:
            time_to_stop_waiting = Null

        if self.next_warning < now:
            self.next_warning = now + wait_time

        while not self.please_stop and len(self.queue) >= self.max:
            if now > time_to_stop_waiting:
                if not _Log:
                    _late_import()
                _Log.error(THREAD_TIMEOUT)

            if self.silent:
                self.lock.wait(Till(till=time_to_stop_waiting))
            else:
                self.lock.wait(Till(timeout=wait_time))
                if len(self.queue) >= self.max:
                    now = time()
                    if self.next_warning < now:
                        self.next_warning = now + wait_time
                        _Log.alert(
                            "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec",
                            name=self.name,
                            num=len(self.queue),
                            wait_time=wait_time)

    def __len__(self):
        with self.lock:
            return len(self.queue)

    def __nonzero__(self):
        with self.lock:
            return any(r != THREAD_STOP for r in self.queue)

    def pop(self, till=None):
        """
        WAIT FOR NEXT ITEM ON THE QUEUE
        RETURN THREAD_STOP IF QUEUE IS CLOSED
        RETURN None IF till IS REACHED AND QUEUE IS STILL EMPTY

        :param till:  A `Signal` to stop waiting and return None
        :return:  A value, or a THREAD_STOP or None
        """
        if till is not None and not isinstance(till, Signal):
            _Log.error("expecting a signal")

        with self.lock:
            while True:
                if self.queue:
                    value = self.queue.popleft()
                    return value
                if self.please_stop:
                    break
                if not self.lock.wait(till=till | self.please_stop):
                    if self.please_stop:
                        break
                    return None
        if DEBUG or not self.silent:
            _Log.note(self.name + " queue stopped")
        return THREAD_STOP

    def pop_all(self):
        """
        NON-BLOCKING POP ALL IN QUEUE, IF ANY
        """
        with self.lock:
            output = list(self.queue)
            self.queue.clear()

        return output

    def pop_one(self):
        """
        NON-BLOCKING POP IN QUEUE, IF ANY
        """
        with self.lock:
            if self.please_stop:
                return [THREAD_STOP]
            elif not self.queue:
                return None
            else:
                v = self.queue.pop()
                if v is THREAD_STOP:  # SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION
                    self.please_stop.go()
                return v

    def close(self):
        with self.lock:
            self.please_stop.go()

    def commit(self):
        pass

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
Example #14
0
class Broker:
    @override
    def __init__(self, backing, database, kwargs=None):
        if backing.directory:
            self.backing = DirectoryBacking(kwargs=backing)
        else:
            self.backing = s3.Bucket(kwargs=backing)
        self.db = Sqlite(database)

        # ENSURE DATABASE IS SETUP
        if not self.db.about(VERSION_TABLE):
            schema.setup(self)
        self.next_id = id_generator(db=self.db, version_table=VERSION_TABLE)
        self.queues = []
        self.please_stop = Signal()
        self.cleaner = Thread.run("cleaner", self._cleaner)


    @override
    def get_or_create_queue(self, name, block_size_mb=8, kwargs=None):
        for q in self.queues:
            if q.name == name:
                return q

        # VERIFY QUEUE EXISTS
        with self.db.transaction() as t:
            result = t.query(
                sql_query({"from": QUEUE, "where": {"eq": {"name": name}}})
            )

            if not result.data:
                id = self.next_id()
                t.execute(
                    sql_insert(
                        table=QUEUE,
                        records={
                            "id": id,
                            "name": name,
                            "next_serial": 1,
                            "block_size_mb": block_size_mb,
                            "block_start": 1,
                            "block_end": 1,
                        },
                    )
                )
                t.execute(
                    sql_insert(
                        table=SUBSCRIBER,
                        records={
                            "id": id,
                            "queue": id,
                            "confirm_delay_seconds": 60,
                            "look_ahead_serial": 1000,
                            "last_confirmed_serial": 0,
                            "next_emit_serial": 1,
                            "last_emit_timestamp": Date.now()
                        },
                    )
                )

                output = Queue(id=id, broker=self, kwargs=kwargs)
            else:
                kwargs = first_row(result)
                output = Queue(broker=self, kwargs=kwargs)

        self.queues.append(output)
        return output

    def get_subscriber(self, name=None):
        """
        GET SUBSCRIBER BY id, OR BY QUEUE name
        """
        with self.db.transaction() as t:
            result = t.query(
                SQL(
                    f"""
                    SELECT
                        MIN(s.id) as id
                    FROM
                        {SUBSCRIBER} AS s
                    LEFT JOIN 
                        {QUEUE} as q on q.id = s.queue
                    WHERE
                        q.name = {quote_value(name)}
                    GROUP BY 
                        s.queue
                    """
                )
            )
            if not result:
                Log.error("not expected")

            queue = self.get_or_create_queue(name)
            sub_info = t.query(
                sql_query(
                    {"from": SUBSCRIBER, "where": {"eq": {"id": first_row(result).id}}}
                )
            )

            return Subscription(queue=queue, kwargs=first_row(sub_info))

    def replay(
        self,
        name,
        confirm_delay_seconds=60,
        next_emit_serial=1,
        look_ahead_serial=1000
    ):
        """
        A SUBSCRIBER FOR REVIEWING THE QUEUE CONTENTS, IN ORDER
        """
        queue = self.get_or_create_queue(name)
        id = self.next_id()

        with self.db.transaction() as t:
            t.execute(
                sql_insert(
                    table=SUBSCRIBER,
                    records={
                        "id": id,
                        "queue": queue.id,
                        "last_emit_timestamp": Date.now(),
                        "confirm_delay_seconds": confirm_delay_seconds,
                        "last_confirmed_serial": next_emit_serial - 1,
                        "next_emit_serial": next_emit_serial,
                        "look_ahead_serial": look_ahead_serial,
                    },
                )
            )

        return Subscription(
            id=id, queue=queue, confirm_delay_seconds=confirm_delay_seconds
        )

    def delete_queue(self, name):
        Log.error("You do not need to do this")

    def add_subscription(self, queue, serial_start=0):
        pass

    def clean(self):
        """
        REMOVE ANY RECORDS THAT ARE NOT NEEDED BY QUEUE OR SUBSCRIBERS
        """
        now = Date.now()

        # ANY BLOCKS TO FLUSH?
        with self.db.transaction() as t:
            result = t.query(
                SQL(
                    f"""
                    SELECT id, block_size_mb, block_start
                    FROM {QUEUE} AS q
                    JOIN {BLOCKS} AS b ON b.queue=q.id AND b.serial=q.block_start
                    WHERE b.last_used < {quote_value(now-Duration(WRITE_INTERVAL))}            
                    """
                )
            )

        for stale in rows(result):
            queue = first(q for q in self.queues if q.id == stale.id)
            queue._flush(**stale)

        # REMOVE UNREACHABLE MESSAGES
        conditions = []
        for q in self.queues:
            conditions.append(
                SQL(f"(queue = {quote_value(q.id)} AND serial IN (")
                + SQL(
                    f"""
                    SELECT m.serial
                    FROM {MESSAGES} AS m 
                    LEFT JOIN {UNCONFIRMED} as u ON u.serial = m.serial
                    LEFT JOIN {SUBSCRIBER} as s  ON s.queue = m.queue and s.id = u.subscriber 
                    LEFT JOIN {QUEUE} as q ON q.id = m.queue and m.serial >= q.block_start
                    LEFT JOIN {SUBSCRIBER} as la ON 
                        la.queue = m.queue AND 
                        la.last_confirmed_serial < m.serial AND 
                        m.serial < la.next_emit_serial+la.look_ahead_serial
                    WHERE 
                        m.queue = {q.id} AND
                        s.id IS NULL AND -- STILL UNCONFIRMED POP
                        q.id IS NULL AND -- NOT WRITTEN TO S3 YET
                        la.id IS NULL    -- NOT IN LOOK-AHEAD FOR SUBSCRIBER           
                    """
                )
                + SQL("))")
            )

        with self.db.transaction() as t:
            if DEBUG:
                result = t.query(
                    ConcatSQL(
                        SQL(f"SELECT count(1) AS `count` FROM {MESSAGES} WHERE "),
                        JoinSQL(SQL_OR, conditions),
                    )
                )
                Log.note(
                    "Delete {{num}} messages from database", num=first_row(result).count
                )

            t.execute(
                ConcatSQL(
                    SQL(f"DELETE FROM {MESSAGES} WHERE "), JoinSQL(SQL_OR, conditions)
                )
            )

    def close(self):
        self.please_stop.go()
        for q in self.queues:
            q.flush()
        self.db.close()

    def _cleaner(self, please_stop):
        while not please_stop:
            (
                please_stop | Till(seconds=Duration(WRITE_INTERVAL).total_seconds())
            ).wait()
            self.clean()
Example #15
0
class Thread(object):
    """
    join() ENHANCED TO ALLOW CAPTURE OF CTRL-C, AND RETURN POSSIBLE THREAD EXCEPTIONS
    run() ENHANCED TO CAPTURE EXCEPTIONS
    """

    num_threads = 0

    def __init__(self, name, target, *args, **kwargs):
        self.id = -1
        self.name = name
        self.target = target
        self.end_of_thread = None
        self.synch_lock = Lock("response synch lock")
        self.args = args

        # ENSURE THERE IS A SHARED please_stop SIGNAL
        self.kwargs = copy(kwargs)
        self.kwargs["please_stop"] = self.kwargs.get(
            "please_stop", Signal("please_stop for " + self.name))
        self.please_stop = self.kwargs["please_stop"]

        self.thread = None
        self.stopped = Signal("stopped signal for " + self.name)
        self.cprofiler = None
        self.children = []

        if "parent_thread" in kwargs:
            del self.kwargs["parent_thread"]
            self.parent = kwargs["parent_thread"]
        else:
            self.parent = Thread.current()
            self.parent.add_child(self)

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        if isinstance(type, BaseException):
            self.please_stop.go()

        # TODO: AFTER A WHILE START KILLING THREAD
        self.join()
        self.args = None
        self.kwargs = None

    def start(self):
        try:
            self.thread = start_new_thread(Thread._run, (self, ))
            return self
        except Exception as e:
            Log.error("Can not start thread", e)

    def stop(self):
        for c in copy(self.children):
            if DEBUG and c.name:
                Log.note("Stopping thread {{name|quote}}", name=c.name)
            c.stop()
        self.please_stop.go()

        if DEBUG:
            Log.note("Thread {{name|quote}} got request to stop",
                     name=self.name)

    def add_child(self, child):
        self.children.append(child)

    def remove_child(self, child):
        try:
            self.children.remove(child)
        except Exception as e:
            # happens when multiple joins on same thread
            pass

    def _run(self):
        with CProfiler():

            self.id = get_ident()
            with ALL_LOCK:
                ALL[self.id] = self

            try:
                if self.target is not None:
                    a, k, self.args, self.kwargs = self.args, self.kwargs, None, None
                    response = self.target(*a, **k)
                    with self.synch_lock:
                        self.end_of_thread = Data(response=response)
                else:
                    with self.synch_lock:
                        self.end_of_thread = Null
            except Exception as e:
                e = Except.wrap(e)
                with self.synch_lock:
                    self.end_of_thread = Data(exception=e)
                if self not in self.parent.children:
                    # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT
                    try:
                        Log.fatal("Problem in thread {{name|quote}}",
                                  name=self.name,
                                  cause=e)
                    except Exception:
                        sys.stderr.write(b"ERROR in thread: " +
                                         str(self.name) + b" " + str(e) +
                                         b"\n")
            finally:
                try:
                    children = copy(self.children)
                    for c in children:
                        try:
                            if DEBUG:
                                sys.stdout.write(b"Stopping thread " +
                                                 str(c.name) + b"\n")
                            c.stop()
                        except Exception as e:
                            Log.warning("Problem stopping thread {{thread}}",
                                        thread=c.name,
                                        cause=e)

                    for c in children:
                        try:
                            if DEBUG:
                                sys.stdout.write(b"Joining on thread " +
                                                 str(c.name) + b"\n")
                            c.join()
                        except Exception as e:
                            Log.warning("Problem joining thread {{thread}}",
                                        thread=c.name,
                                        cause=e)
                        finally:
                            if DEBUG:
                                sys.stdout.write(b"Joined on thread " +
                                                 str(c.name) + b"\n")

                    self.stopped.go()
                    if DEBUG:
                        Log.note("thread {{name|quote}} stopping",
                                 name=self.name)
                    del self.target, self.args, self.kwargs
                    with ALL_LOCK:
                        del ALL[self.id]

                except Exception as e:
                    if DEBUG:
                        Log.warning("problem with thread {{name|quote}}",
                                    cause=e,
                                    name=self.name)
                finally:
                    self.stopped.go()
                    if DEBUG:
                        Log.note("thread {{name|quote}} is done",
                                 name=self.name)

    def is_alive(self):
        return not self.stopped

    def join(self, till=None):
        """
        RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS)
        """
        if self is Thread:
            Log.error("Thread.join() is not a valid call, use t.join()")

        children = copy(self.children)
        for c in children:
            c.join(till=till)

        if DEBUG:
            Log.note("{{parent|quote}} waiting on thread {{child|quote}}",
                     parent=Thread.current().name,
                     child=self.name)
        (self.stopped | till).wait()
        if self.stopped:
            self.parent.remove_child(self)
            if not self.end_of_thread.exception:
                return self.end_of_thread.response
            else:
                Log.error("Thread {{name|quote}} did not end well",
                          name=self.name,
                          cause=self.end_of_thread.exception)
        else:
            raise Except(type=THREAD_TIMEOUT)

    @staticmethod
    def run(name, target, *args, **kwargs):
        # ENSURE target HAS please_stop ARGUMENT
        if "please_stop" not in target.__code__.co_varnames:
            Log.error(
                "function must have please_stop argument for signalling emergency shutdown"
            )

        Thread.num_threads += 1

        output = Thread(name, target, *args, **kwargs)
        output.start()
        return output

    @staticmethod
    def wait_for_shutdown_signal(
        please_stop=False,  # ASSIGN SIGNAL TO STOP EARLY
        allow_exit=False,  # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP
        wait_forever=True  # IGNORE CHILD THREADS, NEVER EXIT.  False -> IF NO CHILD THREADS LEFT, THEN EXIT
    ):
        """
        FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL
        SHUTDOWN IS REQUESTED

        SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit"

        :param please_stop:
        :param allow_exit:
        :param wait_forever:: Assume all needed threads have been launched. When done
        :return:
        """
        if not isinstance(please_stop, Signal):
            please_stop = Signal()

        please_stop.on_go(lambda: start_new_thread(_stop_main_thread, ()))

        self_thread = Thread.current()
        if self_thread != MAIN_THREAD:
            Log.error(
                "Only the main thread can sleep forever (waiting for KeyboardInterrupt)"
            )

        if not wait_forever:
            # TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE
            pending = copy(self_thread.children)
            all = AndSignals(please_stop, len(pending))
            for p in pending:
                p.stopped.on_go(all.done)

        try:
            if allow_exit:
                _wait_for_exit(please_stop)
            else:
                _wait_for_interrupt(please_stop)
        except (KeyboardInterrupt, SystemExit) as _:
            Log.alert("SIGINT Detected!  Stopping...")
        finally:
            please_stop.go()

    @staticmethod
    def current():
        id = get_ident()
        with ALL_LOCK:
            try:
                return ALL[id]
            except KeyError:
                return MAIN_THREAD
Example #16
0
 def test_signal_is_boolean(self):
     a = Signal()
     self.assertEqual(bool(a), False)
     a.go()
     self.assertEqual(bool(a), True)
class SpotManager(object):
    @override
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(region_name=kwargs.aws.region,
                        aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
                        aws_secret_access_key=unwrap(
                            kwargs.aws.aws_secret_access_key))
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.no_capacity = {}
        self.no_capacity_file = File(
            kwargs.price_file).parent / "no capacity.json"
        self.done_making_new_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(
            ("id", ))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(
            self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(
            Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(
            Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(
            self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(
        ):
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()

    def update_spot_requests(self):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([
            r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES
            | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN
        ])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[
                    a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type,
                                      a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount)
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price,
                                         a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending)

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(
            SUM(self.price_lookup[
                r.launch_specification.instance_type,
                r.launch_specification.placement].type.utility
                for r in active), 0)
        utility_required = self.instance_manager.required_utility(
            current_utility)
        net_new_utility = utility_required - current_utility

        Log.note(
            "have {{current_utility}} utility running; need {{need_utility}} more utility",
            current_utility=current_utility,
            need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(
                remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = mo_math.min(
                    net_new_utility +
                    self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = mo_math.min(net_new_utility,
                                          self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(
                net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget)

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(seconds=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_making_new_spot_requests.go()

    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                         type=p.type.instance_type)
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist",
                         type=p.type.instance_type,
                         zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = mo_math.min(p.higher_price, max_acceptable_price,
                                  remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price)
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget)
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(
                mo_math.round(float(net_new_utility) / float(p.type.utility),
                              decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(
                    1 for a in self.active
                    if a.launch_specification.instance_type ==
                    p.type.instance_type and a.launch_specification.placement
                    == p.availability_zone)
                all_count = sum(
                    1 for a in self.active
                    if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(
                    mo_math.floor(
                        (all_count * self.settings.max_percent_per_type -
                         current_count) /
                        (1 - self.settings.max_percent_per_type)))

            num = mo_math.min(naive_number_needed, limit_total,
                              self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type)
                continue
            elif num == 1:
                min_bid = mo_math.min(
                    mo_math.max(p.current_price * 1.1, min_bid),
                    max_acceptable_price)
                price_interval = 0
            else:
                price_interval = mo_math.min(min_bid / 10,
                                             (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price)
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget)
                    continue

                last_no_capacity_message = self.no_capacity.get(
                    p.type.instance_type, Null)
                if last_no_capacity_message > Date.now(
                ) - CAPACITY_NOT_AVAILABLE_RETRY:
                    Log.note(
                        "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}",
                        type=p.type.instance_type,
                        last_time=last_no_capacity_message)
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error(
                            "Spot Manager can only request machine one-at-a-time"
                        )

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request))
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine)
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine -
                                         p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e)

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget

    def remove_instances(self, net_new_utility):
        instances = self.running_instances()

        # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE
        remove_list = []
        for acceptable_error in range(0, 8):
            remaining_utility = -net_new_utility
            remove_list = FlatList()
            for s in instances:
                utility = coalesce(s.markup.type.utility, 0)
                if utility <= remaining_utility + acceptable_error:
                    remove_list.append(s)
                    remaining_utility -= utility
            if remaining_utility <= 0:
                net_new_utility = -remaining_utility
                break

        if not remove_list:
            return net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.note("Shutdown {{instances}}", instances=remove_list.id)
        remove_threads = [
            Thread.run("teardown for " + text(i.id),
                       self.instance_manager.teardown, i) for i in remove_list
        ]
        for t in remove_threads:
            try:
                t.join()
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests = remove_list.spot_instance_request_id

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(
            request_ids=remove_spot_requests)

        return net_new_utility

    def running_instances(self):
        # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS
        instances = self._get_managed_instances()
        for r in instances:
            try:
                r.markup = self.price_lookup[r.instance_type, r.placement]
            except Exception as e:
                r.markup = self.price_lookup[r.instance_type, r.placement]
                Log.error("No pricing!!!", e)
        instances = jx.sort(instances, [{
            "value": "markup.type.utility",
            "sort": -1
        }, {
            "value": "markup.estimated_value",
            "sort": 1
        }])
        return instances

    def save_money(self, remaining_budget, net_new_utility):
        remove_spot_requests = wrap([])

        # FIRST CANCEL THE PENDING REQUESTS
        if remaining_budget < 0:
            requests = self._get_managed_spot_requests()
            for r in requests:
                if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN:
                    remove_spot_requests.append(r.id)
                    net_new_utility += self.settings.utility[
                        r.launch_specification.instance_type].utility
                    remaining_budget += r.price

        instances = jx.sort(self.running_instances(), "markup.estimated_value")

        remove_list = wrap([])
        for s in instances:
            if remaining_budget >= 0:
                break
            remove_list.append(s)
            net_new_utility += coalesce(s.markup.type.utility, 0)
            remaining_budget += coalesce(s.request.bid_price,
                                         s.markup.price_80,
                                         s.markup.current_price)

        if not remove_list:
            return remaining_budget, net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.warning("Shutdown {{instances}} to save money!",
                    instances=remove_list.id)
        if ALLOW_SHUTDOWN:
            for g, removals in jx.chunk(remove_list, size=20):
                for i, t in [(i,
                              Thread.run("teardown " + i.id,
                                         self.instance_manager.teardown,
                                         i,
                                         please_stop=False))
                             for i in removals]:
                    try:
                        t.join()
                    except Exception:
                        Log.note("Problem with shutdown of {{id}}", id=i.id)

            remove_spot_requests.extend(remove_list.spot_instance_request_id)

            # TERMINATE INSTANCES
            self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

            # TERMINATE SPOT REQUESTS
            self.ec2_conn.cancel_spot_instance_requests(
                request_ids=remove_spot_requests)
        return remaining_budget, net_new_utility

    @cache(duration=5 * SECOND)
    def _get_managed_spot_requests(self):
        output = wrap([
            datawrap(r)
            for r in self.ec2_conn.get_all_spot_instance_requests()
            if not r.tags.get("Name")
            or r.tags.get("Name").startswith(self.settings.ec2.instance.name)
        ])
        return output

    def _get_managed_instances(self):
        requests = UniqueIndex(["instance_id"],
                               data=self._get_managed_spot_requests().filter(
                                   lambda r: r.instance_id != None))
        reservations = self.ec2_conn.get_all_instances()

        output = []
        for res in reservations:
            for instance in res.instances:
                if instance.tags.get('Name', '').startswith(
                        self.settings.ec2.instance.name
                ) and instance._state.name == "running":
                    instance.request = requests[instance.id]
                    output.append(datawrap(instance))
        return wrap(output)

    def _start_life_cycle_watcher(self):
        failed_locker = Lock()
        failed_attempts = Data()

        def track_setup(
                instance_setup_function,
                request,
                instance,  # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP
                utility,  # THE utility OBJECT FOUND IN CONFIG
                please_stop):
            try:
                instance_setup_function(instance, utility, please_stop)
                instance.add_tag(
                    "Name", self.settings.ec2.instance.name + " (running)")
                with self.net_new_locker:
                    self.net_new_spot_requests.remove(request.id)
            except Exception as e:
                e = Except.wrap(e)
                instance.add_tag("Name", "")
                with failed_locker:
                    failed_attempts[request.id] += [e]
                if "Can not setup unknown " in e:
                    Log.warning("Unexpected failure on startup",
                                instance_id=instance.id,
                                cause=e)
                elif ERROR_ON_CALL_TO_SETUP in e:
                    with failed_locker:
                        causes = failed_attempts[request.id]
                    if len(causes) > 2:
                        Log.warning("Problem with setup() of {{instance_id}}",
                                    instance_id=instance.id,
                                    cause=causes)
                else:
                    Log.warning("Unexpected failure on startup",
                                instance_id=instance.id,
                                cause=e)

        def life_cycle_watcher(please_stop):
            bad_requests = Data()
            setup_threads = []
            last_get = Date.now()
            setup_in_progress = set()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                instances = wrap({
                    i.id: i
                    for r in self.ec2_conn.get_all_instances()
                    for i in r.instances
                })
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r)
                                        for r in spot_requests]
                    if i.id and (not i.tags.get("Name") or i.tags.get(
                        "Name") == self.settings.ec2.instance.name +
                                 " (setup)") and i.id not in setup_in_progress
                    and i._state.name == "running"
                    and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]

                for i, r in please_setup:
                    if not time_to_stop_trying.get(i.id):
                        time_to_stop_trying[
                            i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                    if Date.now() > time_to_stop_trying[i.id]:
                        # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                        self.ec2_conn.terminate_instances(instance_ids=[i.id])
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                        Log.warning(
                            "Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!",
                            instance_id=i.id)
                        continue

                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(
                                    instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error(
                                    "Can not setup unknown {{instance_id}} of type {{type}}",
                                    instance_id=i.id,
                                    type=i.instance_type)

                        i.markup = p
                        i.add_tag("Name",
                                  self.settings.ec2.instance.name + " (setup)")
                        setup_in_progress.add(i.id)
                        t = Thread.run("setup for " + text(i.id), track_setup,
                                       self.instance_manager.setup, r, i, p)
                        if SINGLE_THREAD_SETUP:
                            t.join()
                        setup_threads.append(t)
                    except Exception as e:
                        i.add_tag("Name", "")
                        Log.warning("Unexpected failure on startup",
                                    instance_id=i.id,
                                    cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([
                    r for r in spot_requests
                    if r.status.code in PENDING_STATUS_CODES
                ])
                give_up = wrap([
                    r for r in spot_requests
                    if (r.status.code in PROBABLY_NOT_FOR_A_WHILE
                        | TERMINATED_STATUS_CODES) and r.id not in bad_requests
                ])
                ignore = wrap([
                    r for r in spot_requests if r.status.code in MIGHT_HAPPEN
                ])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_making_new_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now(
                        ) - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)

                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id", ), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(
                            request_ids=give_up.id)
                        Log.note(
                            "Cancelled spot requests {{spots}}, {{reasons}}",
                            spots=give_up.id,
                            reasons=give_up.status.code)

                        for g in give_up:
                            bad_requests[g.id] += 1
                            if g.id in self.net_new_spot_requests:
                                self.net_new_spot_requests.remove(g.id)
                                if g.status.code == "capacity-not-available":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                if g.status.code == "bad-parameters":
                                    self.no_capacity[
                                        g.launch_specification.
                                        instance_type] = Date.now()
                                    Log.warning(
                                        "bad parameters while requesting type {{type}}",
                                        type=g.launch_specification.
                                        instance_type)

                if not pending and self.done_making_new_spot_requests:
                    Log.note("No more pending spot requests")
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}",
                             pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            with Timer("Save no capacity to file"):
                table = [{
                    "instance_type": k,
                    "last_failure": v
                } for k, v in self.no_capacity.items()]
                self.no_capacity_file.write(value2json(table, pretty=True))

            # WAIT FOR SETUP TO COMPLETE
            for t in setup_threads:
                t.join()

            Log.note("life cycle watcher has stopped")

        # Log.warning("lifecycle watcher is disabled")
        timeout = Till(seconds=self.settings.run_interval.seconds - 60)
        self.watcher = Thread.run("lifecycle watcher",
                                  life_cycle_watcher,
                                  please_stop=timeout)

    def _get_valid_availability_zones(self):
        subnets = list(
            self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.
                                          network_interfaces.subnet_id))
        zones_with_interfaces = [s.availability_zone for s in subnets]

        if self.settings.availability_zone:
            # If they pass a list of zones, constrain it by zones we have an
            # interface for.
            return set(zones_with_interfaces) & set(
                listwrap(self.settings.availability_zone))
        else:
            # Otherwise, use all available zones.
            return zones_with_interfaces

    @override
    def _request_spot_instances(self, price, availability_zone_group,
                                instance_type, kwargs):
        kwargs.self = None
        kwargs.kwargs = None

        # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP
        if instance_type.startswith("m3."):
            kwargs.placement_group = None

        kwargs.network_interfaces = NetworkInterfaceCollection(
            *(NetworkInterfaceSpecification(**i)
              for i in listwrap(kwargs.network_interfaces)
              if self.vpc_conn.get_all_subnets(
                  subnet_ids=i.subnet_id,
                  filters={"availabilityZone": availability_zone_group})))

        if len(kwargs.network_interfaces) == 0:
            Log.error(
                "No network interface specifications found for {{availability_zone}}!",
                availability_zone=kwargs.availability_zone_group)

        block_device_map = BlockDeviceMapping()

        # GENERIC BLOCK DEVICE MAPPING
        for dev, dev_settings in kwargs.block_device_map.items():
            block_device_map[dev] = BlockDeviceType(delete_on_termination=True,
                                                    **dev_settings)
        kwargs.block_device_map = block_device_map

        # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping
        num_ephemeral_volumes = ephemeral_storage[instance_type]["num"]
        for i in range(num_ephemeral_volumes):
            letter = convert.ascii2char(98 + i)  # START AT "b"
            kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType(
                ephemeral_name='ephemeral' + text(i),
                delete_on_termination=True)

        if kwargs.expiration:
            kwargs.valid_until = (Date.now() +
                                  Duration(kwargs.expiration)).format(ISO8601)
            kwargs.expiration = None

        # ATTACH NEW EBS VOLUMES
        for i, drive in enumerate(self.settings.utility[instance_type].drives):
            letter = convert.ascii2char(98 + i + num_ephemeral_volumes)
            device = drive.device = coalesce(drive.device, "/dev/sd" + letter)
            d = drive.copy()
            d.path = None  # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType
            d.device = None
            if d.size:
                kwargs.block_device_map[device] = BlockDeviceType(
                    delete_on_termination=True, **d)

        output = list(self.ec2_conn.request_spot_instances(**kwargs))
        return output

    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()
            now = Date.now()

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from":
                        prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {
                                    "coalesce": [{
                                        "rows": {
                                            "timestamp": 1
                                        }
                                    }, {
                                        "date": "eod"
                                    }]
                                },
                                "edges":
                                ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {
                                    "sub": {
                                        "timestamp":
                                        self.settings.uptime.duration.seconds
                                    }
                                }
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone", "instance_type", {
                            "name": "time",
                            "range": {
                                "min": "effective",
                                "max": "expire",
                                "mode": "inclusive"
                            },
                            "allowNulls": False,
                            "domain": {
                                "type": "time",
                                "min":
                                now.floor(HOUR) - self.settings.uptime.history,
                                "max": Date.now().floor(HOUR) + HOUR,
                                "interval": "hour"
                            }
                        }
                    ],
                    "select": [{
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }],
                    "where": {
                        "gt": {
                            "expire":
                            now.floor(HOUR) - self.settings.uptime.history
                        }
                    },
                    "window": [{
                        "name": "current_price",
                        "value": "rows.last.price",
                        "edges": ["availability_zone", "instance_type"],
                        "sort": "time"
                    }]
                }).data

                bid80 = jx.run({
                    "from":
                    ListContainer(name=None, data=hourly_pricing),
                    "edges": [{
                        "value": "availability_zone",
                        "allowNulls": False
                    }, {
                        "name": "type",
                        "value": "instance_type",
                        "allowNulls": False,
                        "domain": {
                            "type": "set",
                            "key": "instance_type",
                            "partitions": self.settings.utility
                        }
                    }],
                    "select": [{
                        "name":
                        "price_80",
                        "value":
                        "price",
                        "aggregate":
                        "percentile",
                        "percentile":
                        self.settings.uptime.bid_percentile
                    }, {
                        "name": "max_price",
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }, {
                        "value": "current_price",
                        "aggregate": "one"
                    }, {
                        "name": "all_price",
                        "value": "price",
                        "aggregate": "list"
                    }],
                    "window": [
                        {
                            "name": "estimated_value",
                            "value": {
                                "div": ["type.utility", "price_80"]
                            }
                        },
                        {
                            "name":
                            "higher_price",
                            "value":
                            lambda row, rownum, rows: find_higher(
                                row.all_price, row.price_80)
                        }  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.sort(bid80.values(), {
                    "value": "estimated_value",
                    "sort": -1
                })

                self.prices = wrap(output)
                self.price_lookup = UniqueIndex(
                    ("type.instance_type", "availability_zone"),
                    data=self.prices)
            return self.prices

    def _get_spot_prices_from_aws(self):
        with Timer("Read no capacity file"):
            try:
                # FILE IS LIST OF {instance_type, last_failure} OBJECTS
                content = self.no_capacity_file.read()
                self.no_capacity = dict(
                    (r.instance_type, r.last_failure)
                    for r in convert.json2value(
                        content, flexible=False, leaves=False))
            except Exception as e:
                self.no_capacity = {}

        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content,
                                           flexible=False,
                                           leaves=False)
            except Exception as e:
                cache = FlatList()

        cache = ListContainer(name=None, data=cache)
        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {
                "value": "timestamp",
                "aggregate": "max"
            }
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX(
                            [Date(most_recent),
                             Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note(
                            "get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at)

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(
                                self.settings.product,
                                "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token)
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(
                                wrap({
                                    "availability_zone": p.availability_zone,
                                    "instance_type": p.instance_type,
                                    "price": p.price,
                                    "product_description":
                                    p.product_description,
                                    "region": p.region.name,
                                    "timestamp": Date(p.timestamp).unix
                                }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(
                prices, {"gte": {
                    "timestamp": {
                        "date": "today-2day"
                    }
                }})

            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"

            File(self.settings.price_file).write(stream())

        return ListContainer(name="prices", data=prices)
Example #18
0
# Intended to test exit behaviour from timeout, SIGINT (CTRL-C), or "exit"
###############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

from mo_logs import Log

from mo_threads import Thread, Signal
from mo_threads.till import Till

please_stop = Signal()


def timeout(please_stop):
    (Till(seconds=20) | please_stop).wait()
    please_stop.go()


Thread.run("timeout", target=timeout, please_stop=please_stop)

Log.note("you must type 'exit', and press Enter, or wait 20seconds")
Thread.wait_for_shutdown_signal(allow_exit=True, please_stop=please_stop)

if not please_stop:
    Log.note("'exit' detected")
else:
    Log.note("timeout detected")
please_stop.go()
Example #19
0
class Clogger:

    # Singleton of the look-ahead scanner Clogger
    SINGLE_CLOGGER = None

    def __new__(cls, *args, **kwargs):
        if cls.SINGLE_CLOGGER is None:
            cls.SINGLE_CLOGGER = object.__new__(cls)
        return cls.SINGLE_CLOGGER

    def __init__(self,
                 conn=None,
                 tuid_service=None,
                 start_workers=True,
                 new_table=False,
                 kwargs=None):
        try:
            self.config = kwargs
            self.conn = conn if conn else sql.Sql(self.config.database.name)
            self.hg_cache = HgMozillaOrg(
                kwargs=self.config.hg_cache,
                use_cache=True) if self.config.hg_cache else Null

            self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService(
                kwargs=self.config.tuid, conn=self.conn, clogger=self)
            self.rev_locker = Lock()
            self.working_locker = Lock()

            if new_table:
                with self.conn.transaction() as t:
                    t.execute("DROP TABLE IF EXISTS csetLog")

            self.init_db()
            self.next_revnum = coalesce(
                self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1)
            self.csets_todo_backwards = Queue(
                name="Clogger.csets_todo_backwards")
            self.deletions_todo = Queue(name="Clogger.deletions_todo")
            self.maintenance_signal = Signal(name="Clogger.maintenance_signal")

            if 'tuid' in self.config:
                self.config = self.config.tuid

            self.disable_backfilling = False
            self.disable_tipfilling = False
            self.disable_deletion = False
            self.disable_maintenance = False

            self.backfill_thread = None
            self.tipfill_thread = None
            self.deletion_thread = None
            self.maintenance_thread = None

            # Make sure we are filled before allowing queries
            numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
            if numrevs < MINIMUM_PERMANENT_CSETS:
                Log.note("Filling in csets to hold {{minim}} csets.",
                         minim=MINIMUM_PERMANENT_CSETS)
                oldest_rev = 'tip'
                with self.conn.transaction() as t:
                    tmp = t.query(
                        "SELECT min(revnum), revision FROM csetLog").data[0][1]
                    if tmp:
                        oldest_rev = tmp
                self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs,
                                    oldest_rev,
                                    timestamp=False)

            Log.note("Table is filled with atleast {{minim}} entries.",
                     minim=MINIMUM_PERMANENT_CSETS)

            if start_workers:
                self.start_workers()
        except Exception as e:
            Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))

    def start_backfilling(self):
        if not self.backfill_thread:
            self.backfill_thread = Thread.run('clogger-backfill',
                                              self.fill_backward_with_list)

    def start_tipfillling(self):
        if not self.tipfill_thread:
            self.tipfill_thread = Thread.run('clogger-tip',
                                             self.fill_forward_continuous)

    def start_maintenance(self):
        if not self.maintenance_thread:
            self.maintenance_thread = Thread.run('clogger-maintenance',
                                                 self.csetLog_maintenance)

    def start_deleter(self):
        if not self.deletion_thread:
            self.deletion_thread = Thread.run('clogger-deleter',
                                              self.csetLog_deleter)

    def start_workers(self):
        self.start_tipfillling()
        self.start_backfilling()
        self.start_maintenance()
        self.start_deleter()
        Log.note("Started clogger workers.")

    def init_db(self):
        with self.conn.transaction() as t:
            t.execute('''
            CREATE TABLE IF NOT EXISTS csetLog (
                revnum         INTEGER PRIMARY KEY,
                revision       CHAR(12) NOT NULL,
                timestamp      INTEGER
            );''')

    def disable_all(self):
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True

    def revnum(self):
        """
        :return: max revnum that was added
        """
        return coalesce(
            self.conn.get_one("SELECT max(revnum) as revnum FROM csetLog")[0],
            0)

    def get_tip(self, transaction):
        return transaction.get_one(
            "SELECT max(revnum) as revnum, revision FROM csetLog")

    def get_tail(self, transaction):
        return transaction.get_one(
            "SELECT min(revnum) as revnum, revision FROM csetLog")

    def _get_clog(self, clog_url):
        try:
            Log.note("Searching through changelog {{url}}", url=clog_url)
            clog_obj = http.get_json(clog_url, retry=RETRY)
            return clog_obj
        except Exception as e:
            Log.error(
                "Unexpected error getting changset-log for {{url}}: {{error}}",
                url=clog_url,
                error=e)

    def _get_one_revision(self, transaction, cset_entry):
        # Returns a single revision if it exists
        _, rev, _ = cset_entry
        return transaction.get_one(
            "SELECT revision FROM csetLog WHERE revision=?", (rev, ))

    def _get_one_revnum(self, transaction, rev):
        # Returns a single revnum if it exists
        return transaction.get_one(
            "SELECT revnum FROM csetLog WHERE revision=?", (rev, ))

    def _get_revnum_range(self, transaction, revnum1, revnum2):
        # Returns a range of revision numbers (that is inclusive)
        high_num = max(revnum1, revnum2)
        low_num = min(revnum1, revnum2)

        return transaction.query("SELECT revnum, revision FROM csetLog WHERE "
                                 "revnum >= " + str(low_num) +
                                 " AND revnum <= " + str(high_num)).data

    def recompute_table_revnums(self):
        '''
        Recomputes the revnums for the csetLog table
        by creating a new table, and copying csetLog to
        it. The INTEGER PRIMARY KEY in the temp table auto increments
        as rows are added.

        IMPORTANT: Only call this after acquiring the
                   lock `self.working_locker`.
        :return:
        '''
        with self.conn.transaction() as t:
            t.execute('''
            CREATE TABLE temp (
                revnum         INTEGER PRIMARY KEY,
                revision       CHAR(12) NOT NULL,
                timestamp      INTEGER
            );''')

            t.execute(
                "INSERT INTO temp (revision, timestamp) "
                "SELECT revision, timestamp FROM csetlog ORDER BY revnum ASC")

            t.execute("DROP TABLE csetLog;")
            t.execute("ALTER TABLE temp RENAME TO csetLog;")

    def check_for_maintenance(self):
        '''
        Returns True if the maintenance worker should be run now,
        and False otherwise.
        :return:
        '''
        numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0]
        Log.note("Number of csets in csetLog table: {{num}}", num=numrevs)
        if numrevs >= SIGNAL_MAINTENANCE_CSETS:
            return True
        return False

    def add_cset_entries(self,
                         ordered_rev_list,
                         timestamp=False,
                         number_forward=True):
        '''
        Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
        based on how changesets are found in the changelog. Going forwards or backwards is dealt
        with by flipping the list
        :param ordered_cset_list: Order given from changeset log searching.
        :param timestamp: If false, records are kept indefinitely
                          but if holes exist: (delete, None, delete, None)
                          those delete's with None's around them
                          will not be deleted.
        :param numbered: If True, this function will number the revision list
                         by going forward from max(revNum), else it'll go backwards
                         from revNum, then add X to all revnums and self.next_revnum
                         where X is the length of ordered_rev_list
        :return:
        '''
        with self.conn.transaction() as t:
            current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
            current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
            if not current_min or not current_max:
                current_min = 0
                current_max = 0

            direction = -1
            start = current_min - 1
            if number_forward:
                direction = 1
                start = current_max + 1
                ordered_rev_list = ordered_rev_list[::-1]

            insert_list = [(start + direction * count, rev,
                            int(time.time()) if timestamp else -1)
                           for count, rev in enumerate(ordered_rev_list)]

            # In case of overlapping requests
            fmt_insert_list = []
            for cset_entry in insert_list:
                tmp = self._get_one_revision(t, cset_entry)
                if not tmp:
                    fmt_insert_list.append(cset_entry)

            for _, tmp_insert_list in jx.groupby(fmt_insert_list,
                                                 size=SQL_CSET_BATCH_SIZE):
                t.execute(
                    "INSERT INTO csetLog (revnum, revision, timestamp)" +
                    " VALUES " + sql_list(
                        quote_set((revnum, revision, timestamp))
                        for revnum, revision, timestamp in tmp_insert_list))

            # Move the revision numbers forward if needed
            self.recompute_table_revnums()

        # Start a maintenance run if needed
        if self.check_for_maintenance():
            Log.note("Scheduling maintenance run on clogger.")
            self.maintenance_signal.go()

    def _fill_in_range(self,
                       parent_cset,
                       child_cset,
                       timestamp=False,
                       number_forward=True):
        '''
        Fills cset logs in a certain range. 'parent_cset' can be an int and in that case,
        we get that many changesets instead. If parent_cset is an int, then we consider
        that we are going backwards (number_forward is False) and we ignore the first
        changeset of the first log, and we ignore the setting for number_forward.
        Otherwise, we continue until we find the given 'parent_cset'.
        :param parent_cset:
        :param child_cset:
        :param timestamp:
        :param number_forward:
        :return:
        '''
        csets_to_add = []
        found_parent = False
        find_parent = False
        if type(parent_cset) != int:
            find_parent = True
        elif parent_cset >= MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG:
            Log.warning(
                "Requested number of new changesets {{num}} is too high. "
                "Max number that can be requested is {{maxnum}}.",
                num=parent_cset,
                maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG)
            return None

        csets_found = 0
        clogs_seen = 0
        final_rev = child_cset
        while not found_parent and clogs_seen < MAX_BACKFILL_CLOGS:
            clog_url = str(
                HG_URL
            ) + "/" + self.config.hg.branch + "/json-log/" + final_rev
            clog_obj = self._get_clog(clog_url)
            clog_csets_list = list(clog_obj['changesets'])
            for clog_cset in clog_csets_list[:-1]:
                if not number_forward and csets_found <= 0:
                    # Skip this entry it already exists
                    csets_found += 1
                    continue

                nodes_cset = clog_cset['node'][:12]
                if find_parent:
                    if nodes_cset == parent_cset:
                        found_parent = True
                        if not number_forward:
                            # When going forward this entry is
                            # the given parent
                            csets_to_add.append(nodes_cset)
                        break
                else:
                    if csets_found + 1 > parent_cset:
                        found_parent = True
                        if not number_forward:
                            # When going forward this entry is
                            # the given parent (which is supposed
                            # to already exist)
                            csets_to_add.append(nodes_cset)
                        break
                    csets_found += 1
                csets_to_add.append(nodes_cset)
            if found_parent == True:
                break

            clogs_seen += 1
            final_rev = clog_csets_list[-1]['node'][:12]

        if found_parent:
            self.add_cset_entries(csets_to_add,
                                  timestamp=timestamp,
                                  number_forward=number_forward)
        else:
            Log.warning(
                "Couldn't find the end of the request for {{request}}. "
                "Max number that can be requested through _fill_in_range is {{maxnum}}.",
                request={
                    'parent_cset': parent_cset,
                    'child_cset': child_cset,
                    'number_forward': number_forward
                },
                maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG)
            return None
        return csets_to_add

    def initialize_to_range(self, old_rev, new_rev, delete_old=True):
        '''
        Used in service testing to get to very old
        changesets quickly.
        :param old_rev: The oldest revision to keep
        :param new_rev: The revision to start searching from
        :return:
        '''
        old_settings = [
            self.disable_tipfilling, self.disable_backfilling,
            self.disable_maintenance, self.disable_deletion
        ]
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True

        old_rev = old_rev[:12]
        new_rev = new_rev[:12]

        with self.working_locker:
            if delete_old:
                with self.conn.transaction() as t:
                    t.execute("DELETE FROM csetLog")
            with self.conn.transaction() as t:
                t.execute("INSERT INTO csetLog (revision, timestamp) VALUES " +
                          quote_set((new_rev, -1)))
            self._fill_in_range(old_rev,
                                new_rev,
                                timestamp=True,
                                number_forward=False)

        self.disable_tipfilling = old_settings[0]
        self.disable_backfilling = old_settings[1]
        self.disable_maintenance = old_settings[2]
        self.disable_deletion = old_settings[3]

    def fill_backward_with_list(self, please_stop=None):
        '''
        Expects requests of the tuple form: (parent_cset, timestamp)
        parent_cset can be an int X to go back by X changesets, or
        a string to search for going backwards in time. If timestamp
        is false, no timestamps will be added to the entries.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.csets_todo_backwards.pop(till=please_stop)
                if please_stop:
                    break

                # If backfilling is disabled, all requests
                # are ignored.
                if self.disable_backfilling:
                    Till(till=CSET_BACKFILL_WAIT_TIME).wait()
                    continue

                if request:
                    parent_cset, timestamp = request
                else:
                    continue

                with self.working_locker:
                    with self.conn.transaction() as t:
                        parent_revnum = self._get_one_revnum(t, parent_cset)
                    if parent_revnum:
                        continue

                    with self.conn.transaction() as t:
                        _, oldest_revision = self.get_tail(t)

                    self._fill_in_range(parent_cset,
                                        oldest_revision,
                                        timestamp=timestamp,
                                        number_forward=False)
                Log.note("Finished {{cset}}", cset=parent_cset)
            except Exception as e:
                Log.warning("Unknown error occurred during backfill: ",
                            cause=e)

    def update_tip(self):
        '''
        Returns False if the tip is already at the newest, or True
        if an update has taken place.
        :return:
        '''
        clog_obj = self._get_clog(
            str(HG_URL) + "/" + self.config.hg.branch + "/json-log/tip")

        # Get current tip in DB
        with self.conn.transaction() as t:
            _, newest_known_rev = self.get_tip(t)

        # If we are still at the newest, wait for CSET_TIP_WAIT_TIME seconds
        # before checking again.
        first_clog_entry = clog_obj['changesets'][0]['node'][:12]
        if newest_known_rev == first_clog_entry:
            return False

        csets_to_gather = None
        if not newest_known_rev:
            Log.note(
                "No revisions found in table, adding {{minim}} entries...",
                minim=MINIMUM_PERMANENT_CSETS)
            csets_to_gather = MINIMUM_PERMANENT_CSETS

        found_newest_known = False
        csets_to_add = []
        csets_found = 0
        clogs_seen = 0
        Log.note("Found new revisions. Updating csetLog tip to {{rev}}...",
                 rev=first_clog_entry)
        while not found_newest_known and clogs_seen < MAX_TIPFILL_CLOGS:
            clog_csets_list = list(clog_obj['changesets'])
            for clog_cset in clog_csets_list[:-1]:
                nodes_cset = clog_cset['node'][:12]
                if not csets_to_gather:
                    if nodes_cset == newest_known_rev:
                        found_newest_known = True
                        break
                else:
                    if csets_found >= csets_to_gather:
                        found_newest_known = True
                        break
                csets_found += 1
                csets_to_add.append(nodes_cset)
            if not found_newest_known:
                # Get the next page
                clogs_seen += 1
                final_rev = clog_csets_list[-1]['node'][:12]
                clog_url = str(
                    HG_URL
                ) + "/" + self.config.hg.branch + "/json-log/" + final_rev
                clog_obj = self._get_clog(clog_url)

        if clogs_seen >= MAX_TIPFILL_CLOGS:
            Log.error(
                "Too many changesets, can't find last tip or the number is too high: {{rev}}. "
                "Maximum possible to request is {{maxnum}}",
                rev=coalesce(newest_known_rev, csets_to_gather),
                maxnum=MAX_TIPFILL_CLOGS * CHANGESETS_PER_CLOG)
            return False

        with self.working_locker:
            Log.note("Adding {{csets}}", csets=csets_to_add)
            self.add_cset_entries(csets_to_add, timestamp=False)
        return True

    def fill_forward_continuous(self, please_stop=None):
        while not please_stop:
            try:
                while not please_stop and not self.disable_tipfilling and self.update_tip(
                ):
                    pass
                (please_stop | Till(seconds=CSET_TIP_WAIT_TIME)).wait()
            except Exception as e:
                Log.warning("Unknown error occurred during tip filling:",
                            cause=e)

    def csetLog_maintenance(self, please_stop=None):
        '''
        Handles deleting old csetLog entries and timestamping
        revisions once they pass the length for permanent
        storage for deletion later.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                # Wait until something signals the maintenance cycle
                # to begin (or end).
                (self.maintenance_signal | please_stop).wait()

                if please_stop:
                    break
                if self.disable_maintenance:
                    continue

                Log.warning(
                    "Starting clog maintenance. Since this doesn't start often, "
                    "we need to explicitly see when it's started with this warning."
                )

                # Reset signal so we don't request
                # maintenance infinitely.
                with self.maintenance_signal.lock:
                    self.maintenance_signal._go = False

                with self.working_locker:
                    all_data = None
                    with self.conn.transaction() as t:
                        all_data = sorted(t.get(
                            "SELECT revnum, revision, timestamp FROM csetLog"),
                                          key=lambda x: int(x[0]))

                    # Restore maximum permanents (if overflowing)
                    new_data = []
                    modified = False
                    for count, (revnum, revision,
                                timestamp) in enumerate(all_data[::-1]):
                        if count < MINIMUM_PERMANENT_CSETS:
                            if timestamp != -1:
                                modified = True
                                new_data.append((revnum, revision, -1))
                            else:
                                new_data.append((revnum, revision, timestamp))
                        elif type(timestamp) != int or timestamp == -1:
                            modified = True
                            new_data.append(
                                (revnum, revision, int(time.time())))
                        else:
                            new_data.append((revnum, revision, timestamp))

                    # Delete annotations at revisions with timestamps
                    # that are too old. The csetLog entries will have
                    # their timestamps reset here.
                    new_data1 = []
                    annrevs_to_del = []
                    current_time = time.time()
                    for count, (revnum, revision,
                                timestamp) in enumerate(new_data[::-1]):
                        new_timestamp = timestamp
                        if timestamp != -1:
                            if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
                                modified = True
                                new_timestamp = current_time
                                annrevs_to_del.append(revision)
                        new_data1.append((revnum, revision, new_timestamp))

                    if len(annrevs_to_del) > 0:
                        # Delete any latestFileMod and annotation entries
                        # that are too old.
                        Log.note(
                            "Deleting annotations and latestFileMod for revisions for being "
                            "older than {{oldest}}: {{revisions}}",
                            oldest=TIME_TO_KEEP_ANNOTATIONS,
                            revisions=annrevs_to_del)
                        with self.conn.transaction() as t:
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(annrevs_to_del))
                            t.execute(
                                "DELETE FROM annotations WHERE revision IN " +
                                quote_set(annrevs_to_del))

                    # Delete any overflowing entries
                    new_data2 = new_data1
                    reved_all_data = all_data[::-1]
                    deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
                    delete_overflowing_revstart = None
                    if len(deleted_data) > 0:
                        _, delete_overflowing_revstart, _ = deleted_data[0]
                        new_data2 = set(all_data) - set(deleted_data)

                        # Update old frontiers if requested, otherwise
                        # they will all get deleted by the csetLog_deleter
                        # worker
                        if UPDATE_VERY_OLD_FRONTIERS:
                            _, max_revision, _ = all_data[-1]
                            for _, revision, _ in deleted_data:
                                with self.conn.transaction() as t:
                                    old_files = t.get(
                                        "SELECT file FROM latestFileMod WHERE revision=?",
                                        (revision, ))
                                if old_files is None or len(old_files) <= 0:
                                    continue

                                self.tuid_service.get_tuids_from_files(
                                    old_files,
                                    max_revision,
                                    going_forward=True,
                                )

                                still_exist = True
                                while still_exist and not please_stop:
                                    Till(seconds=TUID_EXISTENCE_WAIT_TIME
                                         ).wait()
                                    with self.conn.transaction() as t:
                                        old_files = t.get(
                                            "SELECT file FROM latestFileMod WHERE revision=?",
                                            (revision, ))
                                    if old_files is None or len(
                                            old_files) <= 0:
                                        still_exist = False

                    # Update table and schedule a deletion
                    if modified:
                        with self.conn.transaction() as t:
                            insert_into_db_chunked(
                                t, new_data2,
                                "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES "
                            )
                    if not deleted_data:
                        continue

                    Log.note("Scheduling {{num_csets}} for deletion",
                             num_csets=len(deleted_data))
                    self.deletions_todo.add(delete_overflowing_revstart)
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while maintaining csetLog, continuing to try: ",
                    cause=e)
        return

    def csetLog_deleter(self, please_stop=None):
        '''
        Deletes changesets from the csetLog table
        and also changesets from the annotation table
        that have revisions matching the given changesets.
        Accepts lists of csets from self.deletions_todo.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.deletions_todo.pop(till=please_stop)
                if please_stop:
                    break

                # If deletion is disabled, ignore the current
                # request - it will need to be re-requested.
                if self.disable_deletion:
                    Till(till=CSET_DELETION_WAIT_TIME).wait()
                    continue

                with self.working_locker:
                    first_cset = request

                    # Since we are deleting and moving stuff around in the
                    # TUID tables, we need everything to be contained in
                    # one transaction with no interruptions.
                    with self.conn.transaction() as t:
                        revnum = self._get_one_revnum(t, first_cset)[0]
                        csets_to_del = t.get(
                            "SELECT revnum, revision FROM csetLog WHERE revnum <= ?",
                            (revnum, ))
                        csets_to_del = [cset for _, cset in csets_to_del]
                        existing_frontiers = t.query(
                            "SELECT revision FROM latestFileMod WHERE revision IN "
                            + quote_set(csets_to_del)).data

                        existing_frontiers = [
                            existing_frontiers[i][0]
                            for i, _ in enumerate(existing_frontiers)
                        ]
                        Log.note(
                            "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
                            csets=csets_to_del)

                        if len(existing_frontiers) > 0:
                            # This handles files which no longer exist anymore in
                            # the main branch.
                            Log.note(
                                "Deleting existing frontiers for revisions: {{revisions}}",
                                revisions=existing_frontiers)
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(existing_frontiers))

                        Log.note("Deleting annotations...")
                        t.execute(
                            "DELETE FROM annotations WHERE revision IN " +
                            quote_set(csets_to_del))

                        Log.note("Deleting {{num_entries}} csetLog entries...",
                                 num_entries=len(csets_to_del))
                        t.execute("DELETE FROM csetLog WHERE revision IN " +
                                  quote_set(csets_to_del))

                    # Recalculate the revnums
                    self.recompute_table_revnums()
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while deleting from csetLog:",
                    cause=e)
                Till(seconds=CSET_DELETION_WAIT_TIME).wait()
        return

    def get_old_cset_revnum(self, revision):
        self.csets_todo_backwards.add((revision, True))

        revnum = None
        timeout = Till(seconds=BACKFILL_REVNUM_TIMEOUT)
        while not timeout:
            with self.conn.transaction() as t:
                revnum = self._get_one_revnum(t, revision)

            if revnum and revnum[0] >= 0:
                break
            elif revnum[0] < 0:
                Log.note("Waiting for table to recompute...")
            else:
                Log.note("Waiting for backfill to complete...")
            Till(seconds=CSET_BACKFILL_WAIT_TIME).wait()

        if timeout:
            Log.error(
                "Cannot find revision {{rev}} after waiting {{timeout}} seconds",
                rev=revision,
                timeout=BACKFILL_REVNUM_TIMEOUT)
        return revnum

    def get_revnnums_from_range(self, revision1, revision2):
        with self.conn.transaction() as t:
            revnum1 = self._get_one_revnum(t, revision1)
            revnum2 = self._get_one_revnum(t, revision2)
        if not revnum1 or not revnum2:
            did_an_update = self.update_tip()
            if did_an_update:
                with self.conn.transaction() as t:
                    revnum1 = self._get_one_revnum(t, revision1)
                    revnum2 = self._get_one_revnum(t, revision2)

            if not revnum1:
                revnum1 = self.get_old_cset_revnum(revision1)
                # Refresh the second entry
                with self.conn.transaction() as t:
                    revnum2 = self._get_one_revnum(t, revision2)

            if not revnum2:
                revnum2 = self.get_old_cset_revnum(revision2)

                # The first revnum might change also
                with self.conn.transaction() as t:
                    revnum1 = self._get_one_revnum(t, revision1)

        with self.conn.transaction() as t:
            result = self._get_revnum_range(t, revnum1[0], revnum2[0])
        return sorted(result, key=lambda x: int(x[0]))
Example #20
0
 def test_signal_is_not_null(self):
     a = Signal()
     self.assertNotEqual(a, None)
     a.go()
     self.assertNotEqual(a, None)
Example #21
0
class Extract(object):
    @override
    def __init__(self, kwargs=None):
        self.settings = kwargs
        self.schema = SnowflakeSchema(self.settings.snowflake)
        self._extract = extract = kwargs.extract

        # SOME PREP
        get_git_revision()

        # VERIFY WE DO NOT HAVE TOO MANY OTHER PROCESSES WORKING ON STUFF
        with MySQL(**kwargs.snowflake.database) as db:
            processes = None
            try:
                processes = jx.filter(
                    db.query("show processlist"), {
                        "and": [{
                            "neq": {
                                "Command": "Sleep"
                            }
                        }, {
                            "neq": {
                                "Info": "show processlist"
                            }
                        }]
                    })
            except Exception as e:
                Log.warning("no database", cause=e)

            if processes:
                if DEBUG:
                    Log.warning("Processes are running\n{{list|json}}",
                                list=processes)
                else:
                    Log.error("Processes are running\n{{list|json}}",
                              list=processes)

        extract.type = listwrap(extract.type)
        extract.start = listwrap(extract.start)
        extract.batch = listwrap(extract.batch)
        extract.field = listwrap(extract.field)
        if any(
                len(extract.type) != len(other)
                for other in [extract.start, extract.batch, extract.field]):
            Log.error(
                "Expecting same number of dimensions for `type`, `start`, `batch`, and `field` in the `extract` inner object"
            )
        for i, t in enumerate(extract.type):
            if t == "time":
                extract.start[i] = Date(extract.start[i])
                extract.batch[i] = Duration(extract.batch[i])
            elif t == "number":
                pass
            else:
                Log.error('Expecting `extract.type` to be "number" or "time"')

        extract.threads = coalesce(extract.threads, 1)
        self.done_pulling = Signal()
        self.queue = Queue("all batches",
                           max=2 * coalesce(extract.threads, 1),
                           silent=True)

        self.bucket = s3.Bucket(self.settings.destination)
        self.notify = aws.Queue(self.settings.notify)
        Thread.run("get records", self.pull_all_remaining)

    def pull_all_remaining(self, please_stop):
        try:
            try:
                content = File(self.settings.extract.last).read_json()
                if len(content) == 1:
                    Log.note("Got a manually generated file {{filename}}",
                             filename=self.settings.extract.last)
                    start_point = tuple(content[0])
                    first_value = [
                        self._extract.start[0] + (start_point[0] * DAY),
                        start_point[1]
                    ]
                else:
                    Log.note("Got a machine generated file {{filename}}",
                             filename=self.settings.extract.last)
                    start_point, first_value = content
                    start_point = tuple(start_point)
                Log.note("First value is {{start1|date}}, {{start2}}",
                         start1=first_value[0],
                         start2=first_value[1])
            except Exception as _:
                Log.error(
                    "Expecting a file {{filename}} with the last good S3 bucket etl id in array form eg: [[954, 0]]",
                    filename=self.settings.extract.last)
                start_point = tuple(self._extract.start)
                first_value = Null

            counter = Counter(start=0)
            for t, s, b, f, i in reversed(
                    zip(self._extract.type, self._extract.start,
                        self._extract.batch,
                        listwrap(first_value) + DUMMY_LIST,
                        range(len(self._extract.start)))):
                if t == "time":
                    counter = DurationCounter(start=s,
                                              duration=b,
                                              child=counter)
                    first_value[i] = Date(f)
                else:
                    counter = BatchCounter(start=s, size=b, child=counter)

            batch_size = self._extract.batch.last(
            ) * 2 * self.settings.extract.threads
            with MySQL(**self.settings.snowflake.database) as db:
                while not please_stop:
                    sql = self._build_list_sql(db, first_value, batch_size + 1)
                    pending = []
                    counter.reset(start_point)
                    with Timer("Grab a block of ids for processing"):
                        with closing(db.db.cursor()) as cursor:
                            acc = []
                            cursor.execute(sql)
                            count = 0
                            for row in cursor:
                                detail_key = counter.next(row)
                                key = tuple(detail_key[:-1])
                                count += 1
                                if key != start_point:
                                    if first_value:
                                        if not acc:
                                            Log.error(
                                                "not expected, {{filename}} is probably set too far in the past",
                                                filename=self.settings.extract.
                                                last)
                                        pending.append({
                                            "start_point": start_point,
                                            "first_value": first_value,
                                            "data": acc
                                        })
                                    acc = []
                                    start_point = key
                                    first_value = row
                                acc.append(
                                    row[-1]
                                )  # ASSUME LAST COLUMN IS THE FACT TABLE id
                    Log.note("adding {{num}} for processing", num=len(pending))
                    self.queue.extend(pending)

                    if count < batch_size:
                        self.queue.add(THREAD_STOP)
                        break
        except Exception as e:
            Log.warning("Problem pulling data", cause=e)
        finally:
            self.done_pulling.go()
            Log.note("pulling new data is done")

    def _build_list_sql(self, db, first, batch_size):
        # TODO: ENSURE THE LAST COLUMN IS THE id
        if first:
            dim = len(self._extract.field)
            where = SQL_OR.join(
                sql_iso(
                    sql_and(
                        quote_column(f) + ineq(i, e, dim) +
                        db.quote_value(Date(v) if t == "time" else v)
                        for e, (f, v, t) in enumerate(
                            zip(self._extract.field[0:i + 1:], first,
                                self._extract.type[0:i + 1:]))))
                for i in range(dim))
        else:
            where = SQL_TRUE

        selects = []
        for t, f in zip(self._extract.type, self._extract.field):
            if t == "time":
                selects.append(
                    "CAST" +
                    sql_iso(sql_alias(quote_column(f), SQL("DATETIME(6)"))))
            else:
                selects.append(quote_column(f))
        sql = (SQL_SELECT + sql_list(selects) + SQL_FROM +
               self.settings.snowflake.fact_table + SQL_WHERE + where +
               SQL_ORDERBY +
               sql_list(quote_column(f) for f in self._extract.field) +
               SQL_LIMIT + db.quote_value(batch_size))
        return sql

    def extract(self, db, start_point, first_value, data, please_stop):
        Log.note(
            "Starting scan of {{table}} at {{id}} and sending to batch {{start_point}}",
            table=self.settings.snowflake.fact_table,
            id=first_value,
            start_point=start_point)

        id = quote_column(self._extract.field.last())
        ids = (SQL_SELECT + id + SQL_FROM +
               self.settings.snowflake.fact_table + SQL_WHERE + id + " in " +
               sql_iso(sql_list(map(db.quote_value, data))))
        sql = self.schema.get_sql(ids)

        with Timer("Sending SQL"):
            cursor = db.query(sql, stream=True, row_tuples=True)

        extract = self.settings.extract
        fact_table = self.settings.snowflake.fact_table

        with TempFile() as temp_file:
            parent_etl = None
            for s in start_point:
                parent_etl = {"id": s, "source": parent_etl}
            parent_etl["revision"] = get_git_revision()
            parent_etl["machine"] = machine_metadata

            def append(value, i):
                """
                :param value: THE DOCUMENT TO ADD
                :return: PleaseStop
                """
                temp_file.append(
                    convert.value2json({
                        fact_table: elasticsearch.scrub(value),
                        "etl": {
                            "id": i,
                            "source": parent_etl,
                            "timestamp": Date.now()
                        }
                    }))

            with Timer("assemble data"):
                self.construct_docs(cursor, append, please_stop)

            # WRITE TO S3
            s3_file_name = ".".join(map(text_type, start_point))
            with Timer("write to destination {{filename}}",
                       param={"filename": s3_file_name}):
                if not isinstance(self.settings.destination, text_type):
                    destination = self.bucket.get_key(s3_file_name,
                                                      must_exist=False)
                    destination.write_lines(temp_file)
                else:
                    destination = File(self.settings.destination)
                    destination.write(
                        convert.value2json(
                            [convert.json2value(o) for o in temp_file],
                            pretty=True))
                    return False

        # NOTIFY SQS
        now = Date.now()
        self.notify.add({
            "bucket": self.settings.destination.bucket,
            "key": s3_file_name,
            "timestamp": now.unix,
            "date/time": now.format()
        })

        # SUCCESS!!
        File(extract.last).write(convert.value2json([start_point,
                                                     first_value]))

    def construct_docs(self, cursor, append, please_stop):
        """
        :param cursor: ITERATOR OF RECORDS
        :param append: METHOD TO CALL WITH CONSTRUCTED DOCUMENT
        :return: (count, first, next, next_key)
        number of documents added
        the first document in the batch
        the first document of the next batch
        """
        null_values = set(self.settings.snowflake.null_values) | {None}

        count = 0
        rownum = 0
        columns = tuple(wrap(c) for c in self.schema.columns)
        with Timer("Downloading from MySQL"):
            curr_record = Null
            for rownum, row in enumerate(cursor):
                if please_stop:
                    Log.error("Got `please_stop` signal")

                nested_path = []
                next_record = None

                for c, value in zip(columns, row):
                    if value in null_values:
                        continue
                    if len(nested_path) < len(c.nested_path):
                        nested_path = unwrap(c.nested_path)
                        next_record = Data()
                    next_record[c.put] = value

                if len(nested_path) > 1:
                    path = nested_path[-2]
                    children = curr_record[path]
                    if children == None:
                        children = curr_record[path] = wrap([])
                    if len(nested_path) > 2:
                        parent_path = path
                        for path in list(reversed(nested_path[0:-2:])):
                            parent = children.last()
                            relative_path = relative_field(path, parent_path)
                            children = parent[relative_path]
                            if children == None:
                                children = parent[relative_path] = wrap([])
                            parent_path = path

                    children.append(next_record)
                    continue

                if curr_record == next_record:
                    Log.error("not expected")

                if curr_record:
                    append(curr_record["id"], count)
                    count += 1
                curr_record = next_record

            # DEAL WITH LAST RECORD
            if curr_record:
                append(curr_record["id"], count)
                count += 1

        Log.note("{{num}} documents ({{rownum}} db records)",
                 num=count,
                 rownum=rownum)
class Python(object):
    def __init__(self, name, config):
        config = to_data(config)
        if config.debug.logs:
            Log.error("not allowed to configure logging on other process")

        Log.note("begin process")
        # WINDOWS REQUIRED shell, WHILE LINUX NOT
        shell = "windows" in platform.system().lower()
        self.process = Process(
            name, [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"],
            debug=False,
            cwd=os.getcwd(),
            shell=shell)
        self.process.stdin.add(
            value2json(set_default({}, config, {"debug": {
                "trace": True
            }})))
        status = self.process.stdout.pop()
        if status != '{"out":"ok"}':
            Log.error("could not start python\n{{error|indent}}",
                      error=self.process.stderr.pop_all() + [status] +
                      self.process.stdin.pop_all())
        self.lock = Lock("wait for response from " + name)
        self.current_task = DONE
        self.current_response = None
        self.current_error = None

        self.daemon = Thread.run("", self._daemon)
        self.errors = Thread.run("", self._stderr)

    def _execute(self, command):
        with self.lock:
            self.current_task.wait()
            self.current_task = Signal()
            self.current_response = None
            self.current_error = None

            if self.process.service_stopped:
                Log.error("python is not running")
            self.process.stdin.add(value2json(command))
            (self.current_task | self.process.service_stopped).wait()

            try:
                if self.current_error:
                    Log.error("problem with process call",
                              cause=Except.new_instance(self.current_error))
                else:
                    return self.current_response
            finally:
                self.current_task = DONE
                self.current_response = None
                self.current_error = None

    def _daemon(self, please_stop):
        while not please_stop:
            line = self.process.stdout.pop(till=please_stop)
            if line == THREAD_STOP:
                break
            try:
                data = json2value(line)
                if "log" in data:
                    Log.main_log.write(*data.log)
                elif "out" in data:
                    self.current_response = data.out
                    self.current_task.go()
                elif "err" in data:
                    self.current_error = data.err
                    self.current_task.go()
            except Exception as e:
                Log.note("non-json line: {{line}}", line=line)
        DEBUG and Log.note("stdout reader is done")

    def _stderr(self, please_stop):
        while not please_stop:
            try:
                line = self.process.stderr.pop(till=please_stop)
                if line == THREAD_STOP:
                    please_stop.go()
                    break
                Log.note("Error line from {{name}}({{pid}}): {{line}}",
                         line=line,
                         name=self.process.name,
                         pid=self.process.pid)
            except Exception as e:
                Log.error("could not process line", cause=e)

    def import_module(self, module_name, var_names=None):
        if var_names is None:
            self._execute({"import": module_name})
        else:
            self._execute({"import": {"from": module_name, "vars": var_names}})

    def set(self, var_name, value):
        self._execute({"set": {var_name, value}})

    def get(self, var_name):
        return self._execute({"get": var_name})

    def execute_script(self, script):
        return self._execute({"exec": script})

    def __getattr__(self, item):
        def output(*args, **kwargs):
            if len(args):
                if kwargs.keys():
                    Log.error("Not allowed to use both args and kwargs")
                return self._execute({item: args})
            else:
                return self._execute({item: kwargs})

        return output

    def stop(self):
        self._execute({"stop": {}})
        self.process.join()
        self.daemon.stop()
        self.errors.stop()
Example #23
0
class SpotManager(object):
    @override
    def __init__(self, instance_manager, disable_prices=False, kwargs=None):
        self.settings = kwargs
        self.instance_manager = instance_manager
        aws_args = dict(
            region_name=kwargs.aws.region,
            aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id),
            aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key)
        )
        self.ec2_conn = boto.ec2.connect_to_region(**aws_args)
        self.vpc_conn = boto.vpc.connect_to_region(**aws_args)
        self.price_locker = Lock()
        self.prices = None
        self.price_lookup = None
        self.done_spot_requests = Signal()
        self.net_new_locker = Lock()
        self.net_new_spot_requests = UniqueIndex(("id",))  # SPOT REQUESTS FOR THIS SESSION
        self.watcher = None
        self.active = None

        self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile)
        self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY)
        self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute"))
        self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1)

        if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required():
            self._start_life_cycle_watcher()
        if not disable_prices:
            self.pricing()

    def update_spot_requests(self, utility_required):
        spot_requests = self._get_managed_spot_requests()

        # ADD UP THE CURRENT REQUESTED INSTANCES
        all_instances = UniqueIndex("id", data=self._get_managed_instances())
        self.active = active = wrap([r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN])

        for a in active.copy():
            if a.status.code == "request-canceled-and-instance-running" and all_instances[a.instance_id] == None:
                active.remove(a)

        used_budget = 0
        current_spending = 0
        for a in active:
            about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement]
            discount = coalesce(about.type.discount, 0)
            Log.note(
                "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}",
                id=a.id,
                type=a.launch_specification.instance_type,
                zone=a.launch_specification.placement,
                instance_id=a.instance_id,
                price=a.price - discount
            )
            used_budget += a.price - discount
            current_spending += coalesce(about.current_price, a.price) - discount

        Log.note(
            "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)",
            budget=used_budget,
            current=current_spending
        )

        remaining_budget = self.settings.budget - used_budget

        current_utility = coalesce(SUM(self.price_lookup[r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0)
        net_new_utility = utility_required - current_utility

        Log.note("have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility)

        if remaining_budget < 0:
            remaining_budget, net_new_utility = self.save_money(remaining_budget, net_new_utility)

        if net_new_utility < 0:
            if self.settings.allowed_overage:
                net_new_utility = Math.min(net_new_utility + self.settings.allowed_overage * utility_required, 0)

            net_new_utility = self.remove_instances(net_new_utility)

        if net_new_utility > 0:
            net_new_utility = Math.min(net_new_utility, self.settings.max_new_utility)
            net_new_utility, remaining_budget = self.add_instances(net_new_utility, remaining_budget)

        if net_new_utility > 0:
            Log.alert(
                "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour).  Remaining budget is ${{budget|round(decimal=2)}} ",
                num=net_new_utility,
                expected=self.settings.max_utility_price,
                budget=remaining_budget
            )

        # Give EC2 a chance to notice the new requests before tagging them.
        Till(timeout=3).wait()
        with self.net_new_locker:
            for req in self.net_new_spot_requests:
                req.add_tag("Name", self.settings.ec2.instance.name)

        Log.note("All requests for new utility have been made")
        self.done_spot_requests.go()

    def add_instances(self, net_new_utility, remaining_budget):
        prices = self.pricing()

        for p in prices:
            if net_new_utility <= 0 or remaining_budget <= 0:
                break

            if p.current_price == None:
                Log.note("{{type}} has no current price",
                    type=p.type.instance_type
                )
                continue

            if self.settings.utility[p.type.instance_type].blacklist or \
                    p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones):
                Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone)
                continue

            # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY
            max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount
            max_bid = Math.min(p.higher_price, max_acceptable_price, remaining_budget)
            min_bid = p.price_80

            if min_bid > max_acceptable_price:
                Log.note(
                    "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour",
                    type=p.type.instance_type,
                    price=min_bid,
                    remaining=max_acceptable_price
                )
                continue
            elif min_bid > remaining_budget:
                Log.note(
                    "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour",
                    type=p.type.instance_type,
                    bid=min_bid,
                    remaining_budget=remaining_budget
                )
                continue
            elif min_bid > max_bid:
                Log.error("not expected")

            naive_number_needed = int(Math.round(float(net_new_utility) / float(p.type.utility), decimal=0))
            limit_total = None
            if self.settings.max_percent_per_type < 1:
                current_count = sum(1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone)
                all_count = sum(1 for a in self.active if a.launch_specification.placement == p.availability_zone)
                all_count = max(all_count, naive_number_needed)
                limit_total = int(Math.floor((all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type)))

            num = Math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type)
            if num < 0:
                Log.note(
                    "{{type}} is over {{limit|percent}} of instances, no more requested",
                    limit=self.settings.max_percent_per_type,
                    type=p.type.instance_type
                )
                continue
            elif num == 1:
                min_bid = Math.min(Math.max(p.current_price * 1.1, min_bid), max_acceptable_price)
                price_interval = 0
            else:
                price_interval = Math.min(min_bid / 10, (max_bid - min_bid) / (num - 1))

            for i in range(num):
                bid_per_machine = min_bid + (i * price_interval)
                if bid_per_machine < p.current_price:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        current_price=p.current_price
                    )
                    continue
                if bid_per_machine - p.type.discount > remaining_budget:
                    Log.note(
                        "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour",
                        type=p.type.instance_type,
                        bid=bid_per_machine - p.type.discount,
                        remaining=remaining_budget
                    )
                    continue

                try:
                    if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1:
                        Log.error("Spot Manager can only request machine one-at-a-time")

                    new_requests = self._request_spot_instances(
                        price=bid_per_machine,
                        availability_zone_group=p.availability_zone,
                        instance_type=p.type.instance_type,
                        kwargs=copy(self.settings.ec2.request)
                    )
                    Log.note(
                        "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour",
                        num=len(new_requests),
                        type=p.type.instance_type,
                        zone=p.availability_zone,
                        utility=p.type.utility,
                        price=bid_per_machine
                    )
                    net_new_utility -= p.type.utility * len(new_requests)
                    remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests)
                    with self.net_new_locker:
                        for ii in new_requests:
                            self.net_new_spot_requests.add(ii)
                except Exception as e:
                    Log.warning(
                        "Request instance {{type}} failed because {{reason}}",
                        type=p.type.instance_type,
                        reason=e.message,
                        cause=e
                    )

                    if "Max spot instance count exceeded" in e.message:
                        Log.note("No further spot requests will be attempted.")
                        return net_new_utility, remaining_budget

        return net_new_utility, remaining_budget

    def remove_instances(self, net_new_utility):
        instances = self.running_instances()

        # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE
        remove_list = []
        for acceptable_error in range(0, 8):
            remaining_utility = -net_new_utility
            remove_list = FlatList()
            for s in instances:
                utility = coalesce(s.markup.type.utility, 0)
                if utility <= remaining_utility + acceptable_error:
                    remove_list.append(s)
                    remaining_utility -= utility
            if remaining_utility <= 0:
                net_new_utility = -remaining_utility
                break

        if not remove_list:
            return net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.note("Shutdown {{instances}}", instances=remove_list.id)
        for i in remove_list:
            try:
                self.instance_manager.teardown(i)
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests = remove_list.spot_instance_request_id

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests)

        return net_new_utility

    def running_instances(self):
        # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS
        instances = self._get_managed_instances()
        for r in instances:
            try:
                r.markup = self.price_lookup[r.instance_type, r.placement]
            except Exception as e:
                r.markup = self.price_lookup[r.instance_type, r.placement]
                Log.error("No pricing!!!", e)
        instances = jx.sort(instances, [
            {"value": "markup.type.utility", "sort": -1},
            {"value": "markup.estimated_value", "sort": 1}
        ])
        return instances

    def save_money(self, remaining_budget, net_new_utility):
        remove_spot_requests = wrap([])

        # FIRST CANCEL THE PENDING REQUESTS
        if remaining_budget < 0:
            requests = self._get_managed_spot_requests()
            for r in requests:
                if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN:
                    remove_spot_requests.append(r.id)
                    net_new_utility += self.settings.utility[r.launch_specification.instance_type].utility
                    remaining_budget += r.price

        instances = jx.sort(self.running_instances(), "markup.estimated_value")

        remove_list = wrap([])
        for s in instances:
            if remaining_budget >= 0:
                break
            remove_list.append(s)
            net_new_utility += coalesce(s.markup.type.utility, 0)
            remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price)

        if not remove_list:
            return remaining_budget, net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id)
        for i in remove_list:
            try:
                self.instance_manager.teardown(i)
            except Exception as e:
                Log.warning("Teardown of {{id}} failed", id=i.id, cause=e)

        remove_spot_requests.extend(remove_list.spot_instance_request_id)

        # TERMINATE INSTANCES
        self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

        # TERMINATE SPOT REQUESTS
        self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests)
        return remaining_budget, net_new_utility

    @cache(duration=5 * SECOND)
    def _get_managed_spot_requests(self):
        output = wrap([datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name)])
        return output

    def _get_managed_instances(self):
        requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter(lambda r: r.instance_id!=None))
        reservations = self.ec2_conn.get_all_instances()

        output = []
        for res in reservations:
            for instance in res.instances:
                if instance.tags.get('Name', '').startswith(self.settings.ec2.instance.name) and instance._state.name == "running":
                    instance.request = requests[instance.id]
                    output.append(datawrap(instance))
        return wrap(output)

    def _start_life_cycle_watcher(self):
        def life_cycle_watcher(please_stop):
            failed_attempts=Data()

            while not please_stop:
                spot_requests = self._get_managed_spot_requests()
                last_get = Date.now()
                instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances})
                # INSTANCES THAT REQUIRE SETUP
                time_to_stop_trying = {}
                please_setup = [
                    (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests]
                    if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP
                ]
                for i, r in please_setup:
                    try:
                        p = self.settings.utility[i.instance_type]
                        if p == None:
                            try:
                                self.ec2_conn.terminate_instances(instance_ids=[i.id])
                                with self.net_new_locker:
                                    self.net_new_spot_requests.remove(r.id)
                            finally:
                                Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type)
                        i.markup = p
                        try:
                            self.instance_manager.setup(i, coalesce(p, 0))
                        except Exception as e:
                            e = Except.wrap(e)
                            failed_attempts[r.id] += [e]
                            Log.error(ERROR_ON_CALL_TO_SETUP, e)
                        i.add_tag("Name", self.settings.ec2.instance.name + " (running)")
                        with self.net_new_locker:
                            self.net_new_spot_requests.remove(r.id)
                    except Exception as e:
                        if not time_to_stop_trying.get(i.id):
                            time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN
                        if Date.now() > time_to_stop_trying[i.id]:
                            # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE
                            self.ec2_conn.terminate_instances(instance_ids=[i.id])
                            with self.net_new_locker:
                                self.net_new_spot_requests.remove(r.id)
                            Log.warning("Problem with setup of {{instance_id}}.  Time is up.  Instance TERMINATED!", instance_id=i.id, cause=e)
                        elif "Can not setup unknown " in e:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)
                        elif ERROR_ON_CALL_TO_SETUP in e:
                            if len(failed_attempts[r.id]) > 2:
                                Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id])
                        else:
                            Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e)

                if Date.now() - last_get > 5 * SECOND:
                    # REFRESH STALE
                    spot_requests = self._get_managed_spot_requests()
                    last_get = Date.now()

                pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES])
                give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES])
                ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN])  # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT

                if self.done_spot_requests:
                    with self.net_new_locker:
                        expired = Date.now() - self.settings.run_interval + 2 * MINUTE
                        for ii in list(self.net_new_spot_requests):
                            if Date(ii.create_time) < expired:
                                ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS
                                self.net_new_spot_requests.remove(ii)
                        for g in give_up:
                            self.net_new_spot_requests.remove(g.id)
                        for g in ignore:
                            self.net_new_spot_requests.remove(g.id)
                        pending = UniqueIndex(("id",), data=pending)
                        pending = pending | self.net_new_spot_requests

                    if give_up:
                        self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id)
                        Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code)

                if not pending and not time_to_stop_trying and self.done_spot_requests:
                    Log.note("No more pending spot requests")
                    please_stop.go()
                    break
                elif pending:
                    Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending])

                (Till(seconds=10) | please_stop).wait()

            Log.note("life cycle watcher has stopped")

        # Log.warning("lifecycle watcher is disabled")
        timeout = Till(seconds=self.settings.run_interval.seconds - 60)
        self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout)

    def _get_valid_availability_zones(self):
        subnets = list(self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.network_interfaces.subnet_id))
        zones_with_interfaces = [s.availability_zone for s in subnets]

        if self.settings.availability_zone:
            # If they pass a list of zones, constrain it by zones we have an
            # interface for.
            return set(zones_with_interfaces) & set(listwrap(self.settings.availability_zone))
        else:
            # Otherwise, use all available zones.
            return zones_with_interfaces

    @override
    def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs):
        kwargs.kwargs = None

        # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP
        if instance_type.startswith("m3."):
            kwargs.placement_group = None

        kwargs.network_interfaces = NetworkInterfaceCollection(*(
            NetworkInterfaceSpecification(**i)
            for i in listwrap(kwargs.network_interfaces)
            if self.vpc_conn.get_all_subnets(subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group})
        ))

        if len(kwargs.network_interfaces) == 0:
            Log.error("No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group)

        block_device_map = BlockDeviceMapping()

        # GENERIC BLOCK DEVICE MAPPING
        for dev, dev_settings in kwargs.block_device_map.items():
            block_device_map[dev] = BlockDeviceType(
                delete_on_termination=True,
                **dev_settings
            )

        kwargs.block_device_map = block_device_map

        # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping
        num_ephemeral_volumes = ephemeral_storage[instance_type]["num"]
        for i in range(num_ephemeral_volumes):
            letter = convert.ascii2char(98 + i)  # START AT "b"
            kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType(
                ephemeral_name='ephemeral' + unicode(i),
                delete_on_termination=True
            )

        if kwargs.expiration:
            kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601)
            kwargs.expiration = None

        # ATTACH NEW EBS VOLUMES
        for i, drive in enumerate(self.settings.utility[instance_type].drives):
            letter = convert.ascii2char(98 + i + num_ephemeral_volumes)
            device = drive.device = coalesce(drive.device, "/dev/sd" + letter)
            d = drive.copy()
            d.path = None  # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType
            d.device = None
            if d.size:
                kwargs.block_device_map[device] = BlockDeviceType(
                    delete_on_termination=True,
                    **d
                )

        output = list(self.ec2_conn.request_spot_instances(**kwargs))
        return output

    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()

            now = Date.now()
            expressions.ALLOW_SCRIPTING = True

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from": prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {"coalesce": [{"rows": {"timestamp": 1}}, {"date": "eod"}]},
                                "edges": ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {"sub": {"timestamp": self.settings.uptime.duration.seconds}}
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone",
                        "instance_type",
                        {
                            "name": "time",
                            "range": {"min": "effective", "max": "expire", "mode": "inclusive"},
                            "allowNulls": False,
                            "domain": {"type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR)+HOUR, "interval": "hour"}
                        }
                    ],
                    "select": [
                        {"value": "price", "aggregate": "max"},
                        {"aggregate": "count"}
                    ],
                    "where": {"gt": {"expire": now.floor(HOUR) - self.settings.uptime.history}},
                    "window": [
                        {
                            "name": "current_price",
                            "value": "rows.last.price",
                            "edges": ["availability_zone", "instance_type"],
                            "sort": "time"
                        }
                    ]
                }).data

                bid80 = jx.run({
                    "from": hourly_pricing,
                    "edges": [
                        {
                            "value": "availability_zone",
                            "allowNulls": False
                        },
                        {
                            "name": "type",
                            "value": "instance_type",
                            "allowNulls": False,
                            "domain": {"type": "set", "key": "instance_type", "partitions": self.settings.utility}
                        }
                    ],
                    "select": [
                        {"name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile},
                        {"name": "max_price", "value": "price", "aggregate": "max"},
                        {"aggregate": "count"},
                        {"value": "current_price", "aggregate": "one"},
                        {"name": "all_price", "value": "price", "aggregate": "list"}
                    ],
                    "window": [
                        {"name": "estimated_value", "value": {"div": ["type.utility", "price_80"]}},
                        {"name": "higher_price", "value": lambda row, rownum, rows: find_higher(row.all_price, row.price_80)}  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.run({
                    "from": bid80,
                    "sort": {"value": "estimated_value", "sort": -1}
                })

                self.prices = wrap(output.data)
                self.price_lookup = UniqueIndex(("type.instance_type", "availability_zone"), data=self.prices)
            return self.prices

    def _get_spot_prices_from_aws(self):
        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content, flexible=False, leaves=False)
            except Exception as e:
                cache = FlatList()

        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {"value": "timestamp", "aggregate": "max"}
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX([Date(most_recent), Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note("get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at
                        )

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(self.settings.product, "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token
                        )
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(wrap({
                                "availability_zone": p.availability_zone,
                                "instance_type": p.instance_type,
                                "price": p.price,
                                "product_description": p.product_description,
                                "region": p.region.name,
                                "timestamp": Date(p.timestamp).unix
                            }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(prices, {"gte": {"timestamp": {"date": "today-2day"}}})
            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"
            File(self.settings.price_file).write(stream())

        return prices