예제 #1
0
    def mass_query_avg(self,
                       table,
                       x="ss_list_price",
                       y="ss_wholesale_cost",
                       percent=5,
                       number=default_mass_query_number,
                       b_random_queries=True):
        q_range_half_length = (self.q_max - self.q_min) * percent / 100.0 / 2.0
        self.logger.logger.info("Start generating queries")
        if b_random_queries:
            random.seed(1.0)
            query_centres = []
            for i in range(number):
                query_centres.append(random.uniform(self.q_min, self.q_max)[0])
        else:
            query_centres = generate_random.make_user_distribution(self.qe.kde,
                                                                   self.q_min,
                                                                   self.q_max,
                                                                   n=number)
        self.logger.logger.info("Finish generating " + str(number) +
                                " queries, the center points are:")
        self.logger.logger.info(str(query_centres))
        exact_results = []
        exact_times = []
        approx_results = []
        approx_times = []
        for i in range(number):
            self.logger.logger.info("start query No." + str(i + 1) +
                                    " out of " + str(number))
            # random.uniform(self.q_min, self.q_max)
            q_centre = query_centres[i]
            q_left = q_centre - q_range_half_length
            q_right = q_centre + q_range_half_length

            sqlStr = "SELECT AVG(" + y + ") FROM " + str(table) + " WHERE  " + \
                x + " BETWEEN " + str(q_left[0]) + " AND " + str(q_right[0])
            self.logger.logger.info(sqlStr)

            approx_result, approx_time = self.query_2d_avg(l=q_left, h=q_right)
            self.logger.logger.info(approx_result)
            exact_result, exact_time = self.query2mysql(sql=sqlStr)

            self.logger.logger.info(exact_result)
            if (exact_result is not None) and (exact_result is not 0):
                exact_results.append(exact_result)
                exact_times.append(exact_time)
                approx_results.append(approx_result)
                approx_times.append(approx_time)
            else:
                self.logger.logger.warning(
                    "MYSQL returns None, so this record is ignored.")
        self.logger.logger.warning("MYSQL query results: " +
                                   str(exact_results))
        self.logger.logger.warning("MYSQL query time cost: " +
                                   str(exact_times))
        self.logger.logger.warning("Approximate query results: " +
                                   str(approx_results))
        self.logger.logger.warning("Approximate query time cost: " +
                                   str(approx_times))
        return exact_results, approx_results, exact_times, approx_times
예제 #2
0
    def mass_query_correlation(self,table,x="ss_list_price",y="ss_wholesale_cost",percent=5,number=default_mass_query_number):
        q_range_half_length = (self.q_max-self.q_min)*percent/100.0/2.0
        # random.seed(1.0)
        random_left_boundary = self.q_min + q_range_half_length
        random_right_boundary = self.q_max - q_range_half_length
        query_centres = generate_random.make_user_distribution(self.qe.kde, 30, 100,n=number)
        exact_results = []
        exact_times = []
        approx_results = []
        approx_times = []
        for i in range(number):
            self.logger.logger.info("start query No."+str(i+1) +" out of "+str(number))
            q_centre = query_centres[i] #random.uniform(self.q_min,self.q_max)
            q_left = q_centre - q_range_half_length
            q_right = q_centre + q_range_half_length
            

            sqlStr = "SELECT CORR("+x+", "+y+") FROM " + str(table) +" WHERE  "+x+" BETWEEN " + str(q_left[0]) +" AND " +str(q_right[0])
            self.logger.logger.info(sqlStr)

            approx_result, approx_time = self.query_2d_correlation(l=q_left,h=q_right)
            self.logger.logger.info(approx_result)
            exact_result, exact_time = self.query2mysql(sql=sqlStr)

            self.logger.logger.info(exact_result)
            if (exact_result is not None) and (exact_result is not 0):
                exact_results.append(exact_result)
                exact_times.append(exact_time)
                approx_results.append(approx_result)
                approx_times.append(approx_time)
            else:
                self.logger.logger.warning("MYSQL returns None, so this record is ignored.")
        self.logger.logger.warning("MYSQL query results: " + str(exact_results))
        self.logger.logger.warning("MYSQL query time cost: " + str(exact_times))
        self.logger.logger.warning("Approximate query results: " + str(approx_results))
        self.logger.logger.warning("Approximate query time cost: " + str(approx_times))
        self.logger.logger.info("")
        return exact_results, approx_results, exact_times, approx_times
예제 #3
0
    def generate_queries(self,
                         table,
                         x="ss_list_price",
                         y="ss_wholesale_cost",
                         percent=5,
                         number=default_mass_query_number,
                         b_random_queries=False,
                         mode=2,
                         file_pre="etrade_"):
        """Summary

        Args:
            table (TYPE): TABLE name
            x (str, optional): x
            y (str, optional): y
            percent (int, optional): query width, or query domain
            number (TYPE, optional): number of queries generated
            b_random_queries (bool, optional): Description
            mode (str, optional): 1 for percentile queries, QRegQL;
                                  2 for other queries, QRegQL;
                                  1 for percentile queries, SQL or HIVEQL;
                                  2 for other queries, SQL or HIVEQL;
        """
        if mode is 1:
            file_name_qreg = file_pre + "percentile" + str(percent) + ".qreg"
            file_name_hiveql = file_pre + \
                "percentile" + str(percent) + ".hiveql"
        elif mode is 2:
            file_name_qreg = file_pre + "queries" + str(percent) + ".qreg"
            file_name_hiveql = file_pre + "queries" + str(percent) + ".hiveql"
        else:
            self.logger.logger.error(
                "Failed to generate queries, no mode selected!")
            sys.exit(errno.EPERM)
        q_range_half_length = (self.q_max - self.q_min) * percent / 100.0 / 2.0
        self.logger.logger.info("Start generating queries")
        if b_random_queries:
            random.seed(1.0)
            query_centres = []
            for i in range(number):
                if mode is 1:
                    # _start = (self.q_max - self.q_min) * 0.4
                    # query_centres.append(random.uniform(
                    #     self.q_min + _start, self.q_max)[0])
                    query_centres.append(random.uniform(0.4, 1))
                else:
                    query_centres.append(
                        random.uniform(self.q_min, self.q_max)[0])
        else:
            query_centres = generate_random.make_user_distribution(self.qe.kde,
                                                                   self.q_min,
                                                                   self.q_max,
                                                                   n=number)
        self.logger.logger.info("Finish generating " + str(number) +
                                " queries, the center points are:")
        self.logger.logger.info(str(query_centres))
        exact_results = []
        exact_times = []
        approx_results = []
        approx_times = []

        if mode is 1:
            with open(file_name_hiveql, 'w+') as f_hiveql:
                with open(file_name_qreg, 'w+') as f_qreg:
                    for i in range(number):
                        q_centre = query_centres[i]
                        qregStr = str(q_centre)
                        sqlStr = "SELECT percentile_cont(" + x + \
                            ", " + str(q_centre) + ") FROM " + str(table)
                        f_hiveql.write(sqlStr + "\n")
                        f_qreg.write(qregStr + "\n")
        else:
            with open(file_name_hiveql, 'w+') as f_hiveql:
                with open(file_name_qreg, 'w+') as f_qreg:
                    aggregates = ["COUNT", "SUM", "AVG", "MIN", "MAX"]
                    for aggregate in aggregates:
                        for i in range(number):
                            q_centre = query_centres[i]
                            q_left = q_centre - q_range_half_length
                            q_right = q_centre + q_range_half_length
                            qregStr = str(q_left[0]) + ", " + str(q_right[0])
                            sqlStr = "SELECT " + aggregate + "(" + y + ") FROM " + str(table) + " WHERE  " + \
                                x + " BETWEEN " + \
                                    str(q_left[0]) + " AND " + str(q_right[0])

                            f_hiveql.write(sqlStr + "\n")
                            if aggregate is "COUNT":
                                f_qreg.write(qregStr + "\n")
                    for i in range(number):
                        q_centre = query_centres[i]
                        q_left = q_centre - q_range_half_length
                        q_right = q_centre + q_range_half_length
                        qregStr = str(q_left[0]) + ", " + str(q_right[0])
                        sqlStr = "SELECT " + "VARIANCE" + "(" + x + ") FROM " + str(table) + " WHERE  " + \
                            x + " BETWEEN " + \
                                str(q_left[0]) + " AND " + str(q_right[0])

                        f_hiveql.write(sqlStr + "\n")

            self.logger.logger.info("Queries are written to file " +
                                    file_name_qreg + " and " +
                                    file_name_hiveql)