Exemple #1
0
def test_timer_loop():

    num_sleeps = 100
    sleep_duration = 0.1

    t1 = Timer()
    t2 = Timer()

    t1.start()

    for i in range(0, num_sleeps):
        t2.start()
        time.sleep(sleep_duration)
        t2.stop()

    t1.stop()

    # print("Elapsed time {}".format(t1.elapsed()))
    # print("Elapsed time {}".format(t2.elapsed()))

    expected_elapsed_time = num_sleeps * sleep_duration
    assert expected_elapsed_time - 0.1 < t1.elapsed(
    ) < expected_elapsed_time + 0.1
    assert expected_elapsed_time - 0.1 < t2.elapsed(
    ) < expected_elapsed_time + 0.1
Exemple #2
0
def test_timer_start_stop():

    sleep_duration = 0.1

    t = Timer()

    t.start()
    time.sleep(sleep_duration)
    t.stop()
    time.sleep(sleep_duration)

    # print("Elapsed time {}".format(t.elapsed()))

    expected_elapsed_time = 1 * sleep_duration
    assert expected_elapsed_time - 0.1 < t.elapsed(
    ) < expected_elapsed_time + 0.1
Exemple #3
0
class OpMetrics(object):
    def __init__(self):
        self.__timer = Timer()

    def timer_start(self):
        self.__timer.start()

    def timer_stop(self):
        self.__timer.stop()

    def elapsed_time(self):
        return self.__timer.elapsed()

    def start_time(self):
        return self.__timer.start_time

    def timer_running(self):
        return self.__timer.running

    @staticmethod
    def print_metrics(op_list):
        for op in op_list:
            OpMetrics.pretty_print(op)

    @staticmethod
    def print_overall_metrics(op_list, name=None):
        total_elapsed = 0
        for op in op_list:
            total_elapsed += op.op_metrics.elapsed_time()
        if name is None:
            name = " + ".join([o.name for o in op_list])
        print('{}: {}'.format(total_elapsed, name))

    @staticmethod
    def pretty_print(op):
        print("{}: {}".format(op, op.op_metrics))

    def __repr__(self):
        return {'elapsed_time': round(self.elapsed_time(), 5)}.__repr__()
Exemple #4
0
class QueryPlan(object):
    """Container for the operators in a query. Really a convenience class that allows the plan graph to be
    generated and the operator execution metrics.

    """

    def __init__(self, system=None, operators=None, is_async=False, buffer_size=1024, use_shared_mem=False):
        # type: (WorkerSystem, list, bool, int, bool) -> None
        """

        :param operators:
        """

        self.system = system
        self.use_shared_mem = use_shared_mem

        self.__timer = Timer()
        self.total_elapsed_time = 0.0
        self.__debug_timer = OrderedDict() 
        self.completion_counter = 0

        if operators is None:
            self.operators = OrderedDict()
        else:
            self.operators = OrderedDict()
            for o in operators:
                self.operators[o.name] = o

        self.is_async = is_async
        self.queue = Queue()

        self.buffer_size = buffer_size
        # Cost related metrics
        self.sql_scanned_bytes = 0
        self.sql_returned_bytes = 0
        self.returned_bytes = 0
        self.num_http_get_requests = 0

    def get_operator(self, name):
        return self.operators[name]

    def add_operator(self, operator):
        """Adds the operator to the list of operators in the plan. This method ensures the operators are sorted by
        operator name - which is important only for retrieving the root operators.

        :param operator:
        :return:
        """

        if operator.name in self.operators:
            raise Exception("Cannot add multiple operators with same name. "
                            "Operator '{}' already added".format(operator.name))

        operator.set_query_plan(self)
        operator.set_buffer_size(self.buffer_size)

        self.operators[operator.name] = operator
        sorted_operators = sorted(self.operators.values(), key=lambda o: o.name)
        self.operators = OrderedDict((o.name, o) for o in sorted_operators)

        return operator

    def traverse_depth_first(self, from_operator, visited_operators):
        """Returns a sorted list of the operators according to where they appear in the query plan using depth first
        traversal, starting from the supplied operator.

        :param from_operator: First operator to start from
        :param visited_operators: Operators already traversed
        :return: Sorted list of operators
        """

        if from_operator.name not in visited_operators:
            visited_operators[from_operator.name] = from_operator

        for c in from_operator.consumers:
            visited_operators = self.traverse_depth_first(c, visited_operators)

        return visited_operators

    @staticmethod
    def traverse_breadth_first(from_operators):
        """Returns a sorted list of the operators according to where they appear in the query plan using breadth
        first traversal, starting from the supplied operators.

        :param from_operators: The operators to start from
        :return: Sorted list of operators
        """

        visited_operators = OrderedDict()
        operators_list = deque(from_operators)
        while operators_list:
            o = operators_list.popleft()
            if o.name not in visited_operators:
                visited_operators[o.name] = o

                # Add the consumers except any visited
                for c in o.consumers:
                    if c.name not in visited_operators:
                        operators_list.append(c)

        return visited_operators.values()

    def traverse_topological(self):
        """This method sorts the operators topgraphically and by operator name. This is to ensure
        that the operator metrics are written in the order that most closely aligns with the plan. Note that this
        is not the same order that operators execute in, which may be counterintuitive - e.g. A top operator can
        complete before a scan.

        :return: Sorted list of operators
        """

        g = networkx.DiGraph()

        for o in self.operators.values():
            g.add_node(o.name, value=o)

        for o in self.operators.values():
            for c in o.consumers:
                g.add_edge(o.name, c.name)

        sorted_operators = list(map(lambda name: g.nodes[name]['value'], networkx.topological_sort(g)))

        return sorted_operators

    def traverse_depth_first_from_root(self):
        """Returns a sorted list of the operators according to where they appear in the query plan, starting from
        the root operators.

        :return: Sorted list of operators
        """

        # Find the operators at the root of the graph, they will have no producers
        root_operators = self.find_root_operators()

        visited_operators = OrderedDict()
        for o in root_operators:
            visited_operators = self.traverse_depth_first(o, visited_operators)

        return visited_operators.values()

    def traverse_breadth_first_from_root(self):
        """Returns a sorted list of the operators according to where they appear in the query plan, starting from
        the root operators.

        :return: Sorted list of operators
        """

        # Find the operators at the root of the graph, they will have no producers
        root_operators = self.find_root_operators()

        visited_operators = self.traverse_breadth_first(root_operators)

        return visited_operators

    def traverse_topological_from_root(self):
        visited_operators = self.traverse_topological()
        return visited_operators

    def find_root_operators(self):
        root_operators = []
        for o in self.operators.values():
            if not o.producers:
                root_operators.append(o)
        return root_operators

    def send(self, message, operator_name, sender_op):
        # NOTE: Have to check that the sender op is async otherwise won't be able to send any messages back to it
        # (which is needed to send dataframes)
        if self.use_shared_mem:
            if type(message) is list:
                for e in message[0]:
                    if type(e) is pd.DataFrame:
                        self.system.send(operator_name, DataFrameMessage(e), sender_op.worker)
                    else:
                        self.system.send(operator_name, e, sender_op.worker)
            else:
                self.system.send(operator_name, message, sender_op.worker)
        else:
            if type(message) is list and len(message) <= 0:
                return
            else:
                o = self.operators[operator_name]
                o.queue.put(cPickle.dumps(message, cPickle.HIGHEST_PROTOCOL))

    def print_metrics(self):

        print("")
        print("Metrics")
        print("-------")
        print("")

        print("Plan")
        print("----")

        print("buffer_size: {}".format(self.buffer_size))
        print("is_parallel: {}".format(self.is_async))
        print("total_elapsed_time: {}".format(round(self.total_elapsed_time, 5)))

        print("")
        print("Cost")
        print("----")
        self.print_cost_metrics()

        print("")
        print("Operator Completion Time")
        print("------------------------")
        for k, v in self.__debug_timer.items():
            print("{}: {}".format(k, v))

        print("")
        print("Operators")
        print("---------")

        OpMetrics.print_metrics(self.traverse_topological_from_root())

        # self.assert_operator_time_equals_plan_time()

        print("")

    def print_cost_metrics(self):
        scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost")]
        for op in scan_operators:
            if type(op) is TableRangeAccess:
                self.returned_bytes += op.op_metrics.bytes_returned
                self.num_http_get_requests += op.op_metrics.num_http_get_requests
            elif type(op) is TableScan:
                self.returned_bytes += op.op_metrics.bytes_returned
                self.num_http_get_requests += op.op_metrics.num_http_get_requests
            elif type(op) is SQLTableScan:
                self.sql_returned_bytes += op.op_metrics.bytes_returned
                self.sql_scanned_bytes += op.op_metrics.bytes_scanned
                self.num_http_get_requests += op.op_metrics.num_http_get_requests
            elif type(op) is SQLTableScanBloomUse:
                self.sql_returned_bytes += op.op_metrics.bytes_returned
                self.sql_scanned_bytes += op.op_metrics.bytes_scanned
                self.num_http_get_requests += op.op_metrics.num_http_get_requests
            else:
                # raise Exception("Unrecognized scan operator {}".format(type(op)))
                pass

        print("sql_scanned_bytes: {}".format(self.sql_scanned_bytes))
        print("sql_returned_bytes: {}".format(self.sql_returned_bytes))
        print("returned_bytes: {}".format(self.returned_bytes))
        print("num_http_get_requests: {}".format(self.num_http_get_requests))
        print("")

        cost, bytes_scanned, bytes_returned, http_requests, rows = self.cost()
        computation_cost = self.computation_cost()
        data_cost = self.data_cost()[0]
        print("total_scanned_bytes: {} MB".format(bytes_scanned * BYTE_TO_MB))
        print("total_returned_bytes: {} MB".format(bytes_returned * BYTE_TO_MB))
        print("total_http_get_requests: {}".format(http_requests))
        print("total_returned_rows: {}".format(rows))
        print("computation_cost: ${0:.10f}".format(computation_cost))
        print("data_cost: ${0:.10f}".format(data_cost))
        print("total_cost: ${0:.10f}".format(cost))

    def assert_operator_time_equals_plan_time(self):
        """Sanity check to make sure cumulative operator exec time approximately equals total plan exec time. We use a
        margin of error of 0.1 seconds to account for any time not captured during a context switch.

        Disabled when -O flag is set

        :return: None
        """

        if __debug__:

            margin = 0.1

            cum_op_time = 0.0
            for o in self.operators.values():
                cum_op_time += o.op_metrics.elapsed_time()

            upper_time_bound = self.total_elapsed_time * (1.0 + margin)
            lower_time_bound = self.total_elapsed_time * (1.0 - margin)

            assert lower_time_bound <= cum_op_time <= upper_time_bound, \
                "Cumulative operator execution time {} is not within {} seconds of total plan execution time {}".format(
                    margin, cum_op_time, self.total_elapsed_time)

    def write_graph(self, dir_, name):

        graph = Graph(name)

        for o in self.operators.values():
            graph.add_operator(o)

        graph.write(dir_)

    def execute(self):

        if self.is_async:
            map(lambda o: o.init_async(self.queue, self.system, self.use_shared_mem), self.operators.values())
            if self.use_shared_mem:
                self.system.start()
            else:
                map(lambda o: o.boot(), self.operators.values())

        # Find the root operators
        root_operators = self.find_root_operators()

        self.__timer.start()

        # Start the root operators
        for ro in root_operators:
            ro.start()

        # Things become event driven from here, because we may be running async, so we essentially go
        # into an event loop with a state machine to keep track of where we are
        # async_operators = {name: o for name, o in filter(lambda (name, o): o.async_, self.operators.items())}

        if self.is_async:
	    print("Operators: ")
	    print(self.operators)
            operator_completions = {k: False for k, v in self.operators.items()}
            while not all(operator_completions.values()):
                completed_message = self.listen(OperatorCompletedMessage)
                operator_completions[completed_message.name] = True
                self.debug_time(completed_message.name)

        self.__timer.stop()

        self.total_elapsed_time = self.__timer.elapsed()

        if self.is_async:
            # retrieve metrics of all operators. This is important in async queries since the operator runs in a
            # different process.
            operators = self.traverse_topological_from_root()
            for o in operators:
                if o.async_:
                    if self.use_shared_mem:
                        self.system.send(o.name, EvalMessage("self.op_metrics"), None)
                    else:
                        p_message = pickle.dumps(EvalMessage("self.op_metrics"))
                        o.queue.put(p_message)

                    evaluated_msg = self.listen(EvaluatedMessage)  # type: EvaluatedMessage
                    o.op_metrics = evaluated_msg.val
                else:
                    # Don't need to request metrics
                    pass

            map(lambda op: op.set_completed(True), self.operators.values())

    def listen(self, message_type):
        # type: (TypeVar[MessageBase]) -> MessageBase
        try:
            if self.use_shared_mem:
                msg = self.system.listen(message_type)
                return msg
            else:
                while True:
                    p_item = self.queue.get()
                    item = cPickle.loads(p_item)
                    # print(item)

                    if type(item) == message_type:
                        return item
                    else:
                        # Not the message being listened for, warn and skip
                        # This isn't exceptional, but the listener should be made aware that there are messages arriving that
                        # are being ignored
                        warning("While listening for message type {} received message type {} with contents {}".format(message_type, type(item), item))


        except BaseException as e:
            tb = traceback.format_exc(e)
            print(tb)

    def stop(self):
        if self.is_async:
            if self.use_shared_mem:
                self.system.send_all(StopMessage())
                self.system.join()
                self.system.close()
            else:
                map(lambda o: o.queue.put(cPickle.dumps(StopMessage())), filter(lambda o: o.async_, self.operators.values()))
                self.join()
                map(lambda o: o.queue.close(), filter(lambda o: o.async_, self.operators.values()))
        else:
            pass

    def join(self):
        return map(lambda o: o.join(), self.operators.values())

    def get_phase_runtime(self, phase_keyword=None):
        phase_operators = [op for op in self.operators.values() if phase_keyword in op.name]

        op_start_times = [op.op_metrics.start_time() for op in phase_operators]
        op_end_times = [op.op_metrics.elapsed_time() + op.op_metrics.start_time() for op in phase_operators]

        return max(op_end_times) - min(op_start_times)

    def cost(self):
        """
        calculates the overall query cost when runs on S3 by combining the cost of all scan operators in the query
        plus the computation cost based on the EC2 instance type and the query running time
        :return: the estimated cost of the whole query
        """
        total_data_cost, total_scanned_bytes, total_returned_bytes, total_http_requests, total_rows = self.data_cost()
        total_compute_cost = self.computation_cost()

        return total_compute_cost + total_data_cost, total_scanned_bytes, total_returned_bytes, total_http_requests,\
               total_rows

    def data_cost(self, ec2_region=None, phase_keyword=None):
        """
        calculates the estimated query cost when runs on S3 by combining the cost of all scan operators in the query
        :return: the estimated cost of the whole query
        """
        if phase_keyword is None:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost")]
        else:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost") and \
                              phase_keyword in op.name]

        total_data_cost = 0.0
        total_scanned_bytes = 0
        total_returned_bytes = 0
        total_http_requests = 0
        total_rows = 0

        for op in scan_operators:
            if op.is_completed():
                total_data_cost += op.op_metrics.data_cost(ec2_region)
                total_returned_bytes += op.op_metrics.bytes_returned
                total_scanned_bytes += op.op_metrics.bytes_scanned
                total_http_requests += op.op_metrics.num_http_get_requests
                total_rows += op.op_metrics.rows_returned
            else:
                raise Exception("Can't calculate query cost while one or more scan operators {} are still executing"
                                .format(op.name))

        return total_data_cost, total_scanned_bytes, total_returned_bytes, total_http_requests, total_rows

    def data_transfer_cost(self, ec2_region=None, phase_keyword=None):
        if phase_keyword is None:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost")]
        else:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost") and \
                              phase_keyword in op.name]

        total_data_transfer_cost = 0.0
        total_bytes_returned = 0
        total_returned_rows = 0

        for op in scan_operators:
            if op.is_completed():
                total_data_transfer_cost += op.op_metrics.data_transfer_cost()
                total_bytes_returned += op.op_metrics.bytes_returned
                total_returned_rows += op.op_metrics.rows_returned
            else:
                raise Exception("Can't calculate query cost while one or more scan operators {} are still executing"
                                .format(op.name))

        return total_bytes_returned, total_returned_rows, total_data_transfer_cost

    def data_scanning_cost(self, phase_keyword=None):
        if phase_keyword is None:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost")]
        else:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost") and \
                              phase_keyword in op.name]

        total_data_scanning_cost = 0.0
        total_bytes_scanned = 0

        for op in scan_operators:
            if op.is_completed():
                total_data_scanning_cost += op.op_metrics.data_scan_cost()
                total_bytes_scanned += op.op_metrics.bytes_scanned
            else:
                raise Exception("Can't calculate query cost while one or more scan operators {} are still executing"
                                .format(op.name))

        return total_bytes_scanned, total_data_scanning_cost

    def requests_cost(self, phase_keyword=None):
        if phase_keyword is None:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost")]
        else:
            scan_operators = [op for op in self.operators.values() if hasattr(op.op_metrics, "cost") and \
                              phase_keyword in op.name]

        total_http_requests_cost = 0.0
        total_http_requests = 0

        for op in scan_operators:
            if op.is_completed():
                total_http_requests_cost += op.op_metrics.requests_cost()
                total_http_requests += op.op_metrics.num_http_get_requests
            else:
                raise Exception("Can't calculate query cost while one or more scan operators {} are still executing"
                                .format(op.name))

        return total_http_requests, total_http_requests_cost

    def computation_cost(self, ec2_instance_type=None, os_type=None):
        """
        calculates the estimated computation cost of the query
        :return: computation cost
        """
        scan_op = None

        for op in self.operators.values():
            if hasattr(op.op_metrics, "cost"):
                scan_op = op
                break

        return scan_op.op_metrics.computation_cost(self.total_elapsed_time, ec2_instance_type, os_type)

    def estimated_cost_for_config(self, ec2_region, ec2_instance_type=None, os_type=None):
        """
        estimate the hypothetical cost if the query runs on EC2 instance in a different region
        :param ec2_region: AWSRegion to calculate the cost for
        :param ec2_instance_type: the code of an EC2 instance as defined by AWS (r4.8xlarge)
        :param os_type: the name of the os running on the host machine (Linux, Windows ... etc)
        :return: the hypothetical estimated cost for the provided region, instance type and os
        """
        return self.computation_cost(ec2_instance_type, os_type) + self.data_cost(ec2_region)

    def debug_time(self, name):
        self.__debug_timer[name] = self.__timer.elapsed()

    def retrieve_sampling_threshold(self):
        sampling_filter_builder = [op for op in self.operators.values() if type(op) == TopKFilterBuild]
        if len(sampling_filter_builder) > 0:
            filter_builder = sampling_filter_builder[0]
            if filter_builder.async_:
                if self.use_shared_mem:
                    self.system.send(filter_builder.name, EvalMessage("self.threshold"), None)
                else:
                    p_message = pickle.dumps(EvalMessage("self.threshold"))
                    filter_builder.queue.put(p_message)

                evaluated_msg = self.listen(EvaluatedMessage)  # type: EvaluatedMessage
                filter_builder.threshold = evaluated_msg.val

                return filter_builder.threshold