Beispiel #1
0
    def add_cset_entries(self,
                         ordered_rev_list,
                         timestamp=False,
                         number_forward=True):
        '''
        Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
        based on how changesets are found in the changelog. Going forwards or backwards is dealt
        with by flipping the list
        :param ordered_cset_list: Order given from changeset log searching.
        :param timestamp: If false, records are kept indefinitely
                          but if holes exist: (delete, None, delete, None)
                          those delete's with None's around them
                          will not be deleted.
        :param numbered: If True, this function will number the revision list
                         by going forward from max(revNum), else it'll go backwards
                         from revNum, then add X to all revnums and self.next_revnum
                         where X is the length of ordered_rev_list
        :return:
        '''
        with self.conn.transaction() as t:
            current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
            current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
            if not current_min or not current_max:
                current_min = 0
                current_max = 0

            direction = -1
            start = current_min - 1
            if number_forward:
                direction = 1
                start = current_max + 1
                ordered_rev_list = ordered_rev_list[::-1]

            insert_list = [(start + direction * count, rev,
                            int(time.time()) if timestamp else -1)
                           for count, rev in enumerate(ordered_rev_list)]

            # In case of overlapping requests
            fmt_insert_list = []
            for cset_entry in insert_list:
                tmp = self._get_one_revision(t, cset_entry)
                if not tmp:
                    fmt_insert_list.append(cset_entry)

            for _, tmp_insert_list in jx.chunk(fmt_insert_list,
                                               size=SQL_CSET_BATCH_SIZE):
                t.execute(
                    "INSERT INTO csetLog (revnum, revision, timestamp)" +
                    " VALUES " + sql_list(
                        quote_set((revnum, revision, timestamp))
                        for revnum, revision, timestamp in tmp_insert_list))

            # Move the revision numbers forward if needed
            self.recompute_table_revnums()

        # Start a maintenance run if needed
        if self.check_for_maintenance():
            self.maintenance_signal.go()
Beispiel #2
0
    def test_chunk(self):
        data = []
        for g, d in jx.chunk(data, size=5):
             assert False

        data = [1, 2, 3]
        for g, d in jx.chunk(data, size=5):
            assert d == [1, 2, 3]

        data = [1, 2, 3, 4, 5]
        for g, d in jx.chunk(data, size=5):
            assert d == [1, 2, 3, 4, 5]

        data = [1, 2, 3, 4, 5, 6]
        for g, d in jx.chunk(data, size=5):
            assert d == [1, 2, 3, 4, 5] or d == [6]

        data = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        for g, d in jx.chunk(data, size=5):
            assert d == [1, 2, 3, 4, 5] or d == [6, 7, 8, 9]
    def save_money(self, remaining_budget, net_new_utility):
        remove_spot_requests = wrap([])

        # FIRST CANCEL THE PENDING REQUESTS
        if remaining_budget < 0:
            requests = self._get_managed_spot_requests()
            for r in requests:
                if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN:
                    remove_spot_requests.append(r.id)
                    net_new_utility += self.settings.utility[
                        r.launch_specification.instance_type].utility
                    remaining_budget += r.price

        instances = jx.sort(self.running_instances(), "markup.estimated_value")

        remove_list = wrap([])
        for s in instances:
            if remaining_budget >= 0:
                break
            remove_list.append(s)
            net_new_utility += coalesce(s.markup.type.utility, 0)
            remaining_budget += coalesce(s.request.bid_price,
                                         s.markup.price_80,
                                         s.markup.current_price)

        if not remove_list:
            return remaining_budget, net_new_utility

        # SEND SHUTDOWN TO EACH INSTANCE
        Log.warning("Shutdown {{instances}} to save money!",
                    instances=remove_list.id)
        if ALLOW_SHUTDOWN:
            for g, removals in jx.chunk(remove_list, size=20):
                for i, t in [(i,
                              Thread.run("teardown " + i.id,
                                         self.instance_manager.teardown,
                                         i,
                                         please_stop=False))
                             for i in removals]:
                    try:
                        t.join()
                    except Exception:
                        Log.note("Problem with shutdown of {{id}}", id=i.id)

            remove_spot_requests.extend(remove_list.spot_instance_request_id)

            # TERMINATE INSTANCES
            self.ec2_conn.terminate_instances(instance_ids=remove_list.id)

            # TERMINATE SPOT REQUESTS
            self.ec2_conn.cancel_spot_instance_requests(
                request_ids=remove_spot_requests)
        return remaining_budget, net_new_utility
Beispiel #4
0
    def _execute_backlog(self):
        if not self.backlog: return

        backlog, self.backlog = self.backlog, []
        for i, g in jx.chunk(backlog, size=MAX_BATCH_SIZE):
            sql = self.preamble + ";\n".join(g)
            try:
                self.debug and Log.note("Execute block of SQL:\n{{sql|indent}}", sql=sql)
                self.cursor.execute(sql)
                self.cursor.close()
                self.cursor = self.db.cursor()
            except Exception as e:
                Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Beispiel #5
0
def md5(source, chunk_size=CHUNK_SIZE):
    md5s = []
    for g, data in jx.chunk(source.read_bytes(), size=chunk_size):
        md5s.append(hashlib.md5(data).digest())

    if len(md5s) == 0:
        return '"d41d8cd98f00b204e9800998ecf8427e"'
    elif len(md5s) == 1:
        return quote(md5s[0].encode("hex"))
    else:
        Log.warning("not known to work")
        new_md5 = hashlib.md5(b"".join(md5s))
        return unicode(new_md5.hexdigest() + b"-" + str(len(md5s)))
    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.chunk(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            chain = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {
                                    "value": [
                                        _deep_json_to_string(link, depth=3)
                                        for link in chain
                                    ]
                                }
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                    break
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)
Beispiel #7
0
def unescape_name(esc_name):
    if not isinstance(esc_name, ApiName):
        Log.error("expecting an api name")
    if len(esc_name.values) > 1:
        Log.error("do not knwo how to handle")
    try:
        parts = text(esc_name).split("_")
        result = parts[:1]
        for i, (p, q) in jx.chunk(parts[1:], 2):
            if len(p) == 0:
                result.append("_")
            else:
                result.append(hex2chr(p))
            result.append(q)
        name = "".join(result)
        return name
    except Exception:
        return esc_name.values[0]
Beispiel #8
0
    def merge_shards(self):
        shards = []
        tables = list(self.container.client.list_tables(
            self.container.dataset))
        current_view = Null  # VIEW THAT POINTS TO PRIMARY SHARD
        primary_shard_name = None  # PRIMARY SHARD
        api_name = escape_name(self.short_name)

        for table_item in tables:
            table = table_item.reference
            table_api_name = ApiName(table.table_id)
            if text(table_api_name).startswith(text(api_name)):
                if table_api_name == api_name:
                    if table_item.table_type != "VIEW":
                        Log.error("expecting {{table}} to be a view",
                                  table=api_name)
                    current_view = self.container.client.get_table(table)
                    view_sql = current_view.view_query
                    primary_shard_name = _extract_primary_shard_name(view_sql)
                elif SUFFIX_PATTERN.match(
                        text(table_api_name)[len(text(api_name)):]):
                    try:
                        known_table = self.container.client.get_table(table)
                        shards.append(known_table)
                    except Exception as e:
                        Log.warning("could not merge table {{table}}",
                                    table=table,
                                    cause=e)

        if not current_view:
            Log.error("expecting {{table}} to be a view pointing to a table",
                      table=api_name)

        shard_flakes = [
            Snowflake.parse(
                big_query_schema=shard.schema,
                es_index=text(self.container.full_name +
                              ApiName(shard.table_id)),
                top_level_fields=self.top_level_fields,
                partition=self.partition,
            ) for shard in shards
        ]
        total_flake = snowflakes.merge(
            shard_flakes,
            es_index=text(self.full_name),
            top_level_fields=self.top_level_fields,
            partition=self.partition,
        )

        for i, s in enumerate(shards):
            if ApiName(s.table_id) == primary_shard_name:
                if total_flake == shard_flakes[i]:
                    # USE THE CURRENT PRIMARY SHARD AS A DESTINATION
                    del shards[i]
                    del shard_flakes[i]
                    break
        else:
            name = self.short_name + "_" + "".join(Random.sample(ALLOWED, 20))
            primary_shard_name = escape_name(name)
            self.container.create_table(
                table=name,
                schema=total_flake.schema,
                sharded=False,
                read_only=False,
                kwargs=self.config,
            )

        primary_full_name = self.container.full_name + primary_shard_name

        selects = []
        for flake, table in zip(shard_flakes, shards):
            q = ConcatSQL(
                SQL_SELECT,
                JoinSQL(ConcatSQL(SQL_COMMA, SQL_CR),
                        gen_select(total_flake, flake)),
                SQL_FROM,
                quote_column(ApiName(table.dataset_id, table.table_id)),
            )
            selects.append(q)

        Log.note("inserting into table {{table}}",
                 table=text(primary_shard_name))
        matched = []
        unmatched = []
        for sel, shard, flake in zip(selects, shards, shard_flakes):
            if flake == total_flake:
                matched.append((sel, shard, flake))
            else:
                unmatched.append((sel, shard, flake))

        # EVERYTHING THAT IS IDENTICAL TO PRIMARY CAN BE MERGED WITH SIMPLE UNION ALL
        if matched:
            for g, merge_chunk in jx.chunk(matched, MAX_MERGE):
                command = ConcatSQL(
                    SQL_INSERT,
                    quote_column(primary_full_name),
                    JoinSQL(
                        SQL_UNION_ALL,
                        (sql_query({
                            "from":
                            self.container.full_name + ApiName(shard.table_id)
                        }) for _, shard, _ in merge_chunk),
                    ),
                )
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                Log.note("job {{id}} state = {{state}}",
                         id=job.job_id,
                         state=job.state)

                if job.errors:
                    Log.error(
                        "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                        sql=command.sql,
                        reason=job.errors,
                    )
                for _, shard, _ in merge_chunk:
                    self.container.client.delete_table(shard)

        # ALL OTHER SCHEMAS MISMATCH
        for s, shard, _ in unmatched:
            try:
                command = ConcatSQL(SQL_INSERT,
                                    quote_column(primary_full_name), s)
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                Log.note(
                    "from {{shard}}, job {{id}}, state {{state}}",
                    id=job.job_id,
                    shard=shard.table_id,
                    state=job.state,
                )

                if job.errors:
                    if all(" does not have a schema." in m
                           for m in wrap(job.errors).message):
                        pass  # NOTHING TO DO
                    else:
                        Log.error(
                            "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                            sql=command.sql,
                            reason=job.errors,
                        )

                self.container.client.delete_table(shard)
            except Exception as e:
                Log.warning("failure to merge {{shard}}", shard=shard, cause=e)

        # REMOVE OLD VIEW
        view_full_name = self.container.full_name + api_name
        if current_view:
            self.container.client.delete_table(current_view)

        # CREATE NEW VIEW
        self.container.create_view(view_full_name, primary_full_name)