コード例 #1
0
 def update_after_transfer(self, mysql, neo4j_time, sub_p_rate, last_record):
     print("neo4j  query time: " + str(neo4j_time))
     t = threading.Thread(target=self.rdb_query_time, args=(mysql,))
     t.start()
     try:
         t.join(int(float(neo4j_time) * float(self.neo4j_times)))
     except RuntimeError:
         print(self.neo4j_times)
         print(neo4j_time)
         exit(0)
     print("mysql is still running:" + str(t.is_alive()))
     if t.is_alive():
         stop_thread(t)
         mysql_time = neo4j_time * self.neo4j_times
         print("stop mysql immadietely, time:" + str(mysql_time))
     else:
         mysql_time = self.this_time_sql
         print("mysql query successfully, time:" + str(mysql_time))
     improvement = mysql_time - neo4j_time
     for p in set(sub_p_rate.keys()):
         if p in last_record:
             self.reward[p][1][0] += improvement * sub_p_rate[p]
             self.update_q(1, 0, p)
         else:
             self.reward[p][0][1] += improvement * sub_p_rate[p]
             self.update_q(0, 1, p)
     self.this_time_sql = 0
コード例 #2
0
    def stop_polling(self):
        """
        立即停止轮询
        :return:无
        """
        #
        # 如果正在轮询,就停止轮询
        # 点击“停止轮询”,则按钮立即显示“开始轮询”
        #
        if self.isPolling:
            self.ui.btn_submit.setText("开始轮询")
            self.set_enable(True)  # 恢复允许编辑
            self.isPolling = False
            self.append_info("结束轮询")  # 信息台输出结束信息

            self.close_serial()
            #
            # 强制停止子线程
            #
            if self.polling_timer is not None and self.polling_timer.is_alive(
            ):
                util.stop_thread(self.polling_timer)
コード例 #3
0
ファイル: LRU.py プロジェクト: database-ai4db-group/Dotil
    def lru(self):
        self.total_batch_time = 0
        self.batch_time_list = []
        file_name, file_cypher_name, random_file_name = self.get_file_paths()
        f_mysql = open(file_name, 'r', encoding='utf8')
        f_cypher = open(file_cypher_name, 'r', encoding='utf8')
        mysql_queries = f_mysql.readlines()
        cypher_queries = f_cypher.readlines()

        query_order = get_query_order(random_file_name, len(mysql_queries), self.query_order)
        wufenzhiyi = len(query_order) / 5
        total_start = time.perf_counter()
        for batch_number in range(5):
            start_batch_number = int(batch_number * wufenzhiyi)
            end_batch_number = int((batch_number + 1) * wufenzhiyi)
            if batch_number == 9:
                end_batch_number = len(mysql_queries) - 1
            batch_p_list = set()  # 存储每批次的p
            batch_start = time.perf_counter()
            total_batch_start = time.perf_counter()
            for query_number in query_order[start_batch_number:end_batch_number]:
                print("===================================================")
                print("start query online:" + str(query_number))
                fail_start = time.perf_counter()
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                print(new_mysql)
                print(new_cypher)
                print(other_mysql)
                print(length)
                # 如果没有子结构
                if not flag:
                    print("There is no sub query struct, start MySQL Query instead")
                    t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],))
                    t.start()
                    block_time = self.max_time
                    t.join(block_time)
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = block_time
                        print("Stop the mysql query immediately, time:" + str(mysql_time))
                        # self.mysql_query_total_time += mysql_time
                        fail_end = time.perf_counter()
                        self.overtime_query[query_number] = fail_end - fail_start
                        self.rdb_batch_time[batch_number] += self.max_time
                    else:
                        mysql_time = self.this_time_sql
                        print("mysql query successfully , time:" + str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                        self.rdb_batch_time[batch_number] += mysql_time
                    breakdown_time = time.perf_counter()
                    self.breakdown[query_number] = breakdown_time - total_start
                    print("immediately mysql query over, start next query")
                    continue
                else:
                    print("detected the sub query struct")

                # 获取p的占比
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rates: " + str(sub_p_rate))
                for p in sub_p_list:  # 取出本次子结构没有transfer的p
                    if not self.transfer_record[p]:
                        batch_p_list.add(p)

                # 判断是否全部在Neo4j中
                transfer_flag = False
                for p in sub_p_list:
                    if not p in self.lru_list:  # 有一个没transfer的就为True
                        transfer_flag = True
                self.show_transferred_p()
                # 如果一个没transfer的都没有,全部已经transfer
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    for p in sub_p_list:
                        self.lru_list.remove(p)
                        self.lru_list.append(p)
                    print("All p in sub query is transferred, use Neo4j to query")
                    t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length))
                    t2.start()
                    t2.join(100)
                    if t2.is_alive():
                        stop_thread(t2)
                        print("Neo4j over time!")
                        self.gdb_batch_time[batch_number] += 100
                        continue
                    else:
                        self.gdb_batch_time[batch_number] += self.this_time_neo
                        print("neo4j query success")

                    neo4j_time, result_list = self.this_time_neo, self.this_time_results
                    # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate)
                    if len(result_list) != 0:
                        t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list))
                        t1.start()
                        t1.join(self.max_time)
                        if t1.is_alive():
                            stop_thread(t1)
                            print("Final mysql query overtime:" + str(self.max_time))
                            fail_end = time.perf_counter()
                            self.overtime_query[query_number] = fail_end - fail_start
                            self.rdb_batch_time[batch_number] += self.max_time
                        else:
                            self.rdb_batch_time[batch_number] += self.query_time
                            print("Final query run successfully:" + str(self.query_time))
                    else:
                        print("Because of the sub query result length is 0, so jump over the other query")
                    continue
                # 如果有一个没transfer的,就在mysql中直接进行
                else:
                    print("have some p not in neo4j, choose to use mysql")
                    for p in sub_p_list:
                        if p in self.lru_list:
                            self.lru_list.remove(p)
                            self.lru_list.append(p)
                    t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],))
                    t.start()
                    block_time = self.max_time
                    t.join(block_time)
                    print("mysql is still running:" + str(t.is_alive()))
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = block_time
                        print("force stop mysql, time:" + str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                        fail_end = time.perf_counter()
                        self.overtime_query[query_number] = fail_end - fail_start
                        self.rdb_batch_time[batch_number] += self.max_time
                    else:
                        mysql_time = self.this_time_sql
                        self.rdb_batch_time[batch_number] += mysql_time
                        print("mysql query successfully, time:" + str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                    breakdown_time = time.perf_counter()
                    self.breakdown[query_number] = breakdown_time - total_start
                    continue
            batch_end = time.perf_counter()
            self.total_batch_time += batch_end - batch_start
            self.batch_time_list.append(batch_end - batch_start)
            print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start))
            # ==========================================================================================================
            # 结束一个批次的处理,开始Offline过程
            print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
            print("Start process No." + str(batch_number) + "batch transferring")
            offline_training_time = time.perf_counter()
            for query_number in query_order[start_batch_number:end_batch_number]:
                print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
                print("start process number: " + str(query_number))
                # if query_number < 10 or query_number > 15:
                #     continue
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                if not flag:  # 如果没有子结构
                    continue
                # 判断子查询谓语是否包含在图数据库中
                transfer_flag = False
                for p in sub_p_list:
                    if p not in self.lru_list:  # 有一个没transfer的就为True
                        transfer_flag = True
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rate of the sub query" + str(sub_p_rate))
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    for p in sub_p_list:
                        self.lru_list.remove(p)
                        self.lru_list.append(p)
                    continue

                # 根据Q学习判断是否Transfer
                need_transfer = set()
                for p in sub_p_list:
                    need_transfer.add(p)
                sub_total_number = 0
                for p in set(need_transfer):
                    if p not in self.lru_list:
                        sub_total_number += int(self.numbers[p])
                while self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent:
                    print("Because of the data in Neo4j is too much, now trying to delete some p")
                    need_retrieve_p = self.lru_list[0]
                    p_number = int(self.numbers[need_retrieve_p])
                    self.total_neo4j_number -= p_number
                    self.lru_list.remove(need_retrieve_p)

                # 将剩下的已transfer的p导入
                for p in self.lru_list:
                    need_transfer.add(p)

                # delete original transfer record
                self.lru_list.clear()
                print("need transfer: " + str(need_transfer))
                # 开始transfer
                print("transfering")
                transfer_batch_time = transfer_neo4j_import(set(need_transfer), self.flag)
                self.transfer_batch_time[batch_number] += transfer_batch_time
                for p in need_transfer:
                    self.lru_list.append(p)
                self.transfer_total_time += transfer_batch_time
                print("transfer over")
            offline_training_time_end = time.perf_counter()
            self.training_batch_time[batch_number] += offline_training_time_end - offline_training_time - \
                                                      self.transfer_batch_time[batch_number]
            total_batch_end = time.perf_counter()
            self.offandon_batch_time[batch_number] += total_batch_end - total_batch_start
       # self.show_matrix()
        print('\n\n\n')
        total_end = time.perf_counter()
        expect_failed = self.total_batch_time
        overtime_list = []
        for number in self.overtime_query.keys():
            overtime_list.append(number)
            expect_failed -= self.overtime_query[number]
        print("over time queries:" + str(overtime_list))
        print("total time after strip:" + str(expect_failed))
        print(self.batch_time_list)
        self.save()
        return total_end - total_start, [], self.total_batch_time, overtime_list, expect_failed, self.batch_time_list
コード例 #4
0
    def random_policy(self):
        mysql_queries, cypher_queries, query_order = self._q_learning_init()
        batch_number = int(self.cf.get("rdb-gdb", "batch_number"))
        one_batch_length = len(query_order) / batch_number
        total_start = time.perf_counter()  # start: Total Time
        for batch_number in range(batch_number):
            start_batch_number = int(batch_number * one_batch_length)
            end_batch_number = int((batch_number + 1) * one_batch_length)
            if batch_number == 9:
                end_batch_number = len(mysql_queries) - 1
            batch_p_list = set()  # 存储每批次的p
            batch_start = time.perf_counter()  # start: Batch Time
            for query_number in query_order[
                    start_batch_number:end_batch_number]:
                print("===================================================")
                print("start query online:" + str(query_number))
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                p_exist_flag = True
                if flag:
                    for p in sub_p_list:
                        if p not in self.transfer_record:
                            p_exist_flag = False
                # 如果没有子结构
                if not flag or not p_exist_flag:
                    print(
                        "There is no sub query struct, start MySQL Query instead"
                    )
                    t = threading.Thread(target=self.rdb_query_time,
                                         args=(mysql_queries[query_number], ))
                    t.start()
                    t.join(self.max_time)
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = self.max_time
                        print("Stop the mysql query immediately, time:" +
                              str(mysql_time))
                    else:
                        mysql_time = self.this_time_sql
                        print("mysql query successfully , time:" +
                              str(mysql_time))
                    print("immediately mysql query over, start next query")
                    continue
                else:
                    print("detected the sub query struct")
                print("Sub sql query: " + new_mysql)
                print("Sub cypher query: " + new_cypher)
                print("Other query: " + other_mysql)
                print("Length: " + str(length))

                # 获取p的占比
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rates: " + str(sub_p_rate))
                for p in sub_p_list:  # 取出本次子结构没有transfer的p
                    if not self.transfer_record[p]:
                        batch_p_list.add(p)

                # 判断是否全部在Neo4j中
                transfer_flag = False
                for p in sub_p_list:
                    if not self.transfer_record[p]:  # 有一个没transfer的就为True
                        transfer_flag = True
                self.show_transferred_p()
                # 如果一个没transfer的都没有,全部已经transfer
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    print(
                        "All p in sub query is transferred, use Neo4j to query"
                    )
                    t2 = threading.Thread(target=self.neo_query_time,
                                          args=(new_cypher, length))
                    t2.start()
                    t2.join(100)
                    if t2.is_alive():
                        stop_thread(t2)
                        print("Neo4j over time!")
                        continue
                    else:
                        print("Neo4j query success")

                    neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results
                    # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate)
                    if len(result_list) != 0:
                        t1 = threading.Thread(target=self.rdb_second_query,
                                              args=(other_mysql, result_list))
                        t1.start()
                        t1.join(self.max_time)
                        if t1.is_alive():
                            stop_thread(t1)
                            print("Final mysql query overtime:" +
                                  str(self.max_time))
                        else:
                            print("Final query run successfully:" +
                                  str(self.second_query_time))
                    else:
                        print(
                            "Because of the sub query result length is 0, so jump over the other query"
                        )
                    continue
                # 如果有一个没transfer的,就在mysql中直接进行
                else:
                    print("Have some p not in neo4j, choose to use mysql")
                    t = threading.Thread(target=self.rdb_query_time,
                                         args=(mysql_queries[query_number], ))
                    t.start()
                    t.join(self.max_time)
                    print("Mysql is still running:" + str(t.is_alive()))
                    if t.is_alive():
                        stop_thread(t)
                        print("Force stop mysql, time:" + str(self.max_time))
                    else:
                        print("Mysql query successfully, time:" +
                              str(self.this_time_sql))
                    continue
            batch_end = time.perf_counter()  # end: Batch Time
            self.total_batch_time += batch_end - batch_start
            self.batch_time_list.append(batch_end - batch_start)
            print("The No." + str(batch_number) +
                  " batch query Online over, time :" +
                  str(batch_end - batch_start))
            self.run_batches += 1
            if self.run_batches == 6:
                break
            # ==========================================================================================================
            # 结束一个批次的处理,开始Offline过程
            print(
                "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
            )
            print("Start process No." + str(batch_number) +
                  "batch transferring")
            # 随机转移策略:在每个批次之后都会随机转移之前批次20个查询
            # 获取之前批次的所有查询
            former_sql = mysql_queries[:end_batch_number]
            former_cypher = cypher_queries[:end_batch_number]
            need_transfer_p_set = set()
            random_choose = random.sample(
                range(len(former_sql)), end_batch_number - start_batch_number)
            # 将原来的全归零
            for p in self.transfer_record:
                self.transfer_record[p] = False
            self.total_neo4j_number = 0
            # 将随机选择的加入集合
            full_flag = False
            for num in random_choose:
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(former_sql[num], former_cypher[num])
                print(flag)
                print(sub_p_list)
                if flag:
                    for p in sub_p_list:
                        if not full_flag and p in self.transfer_record:
                            need_transfer_p_set.add(p)
                            self.total_neo4j_number += int(self.numbers[p])
                            self.transfer_record[p] = True
                        else:
                            break
                        if int(
                                self.numbers[p]
                        ) + self.total_neo4j_number >= self.total_number * self.transfer_percent:
                            full_flag = True
                if full_flag:
                    break
            print(random_choose)
            print(need_transfer_p_set)
            if len(need_transfer_p_set) != 0:
                self.transfer_neo4j_import_no_merge(need_transfer_p_set)

        total_end = time.perf_counter()  # end: Total Time
        q_values = []
        q00 = 0
        q01 = 0
        q10 = 0
        q11 = 0
        for p in self.q.keys():
            q00 += self.q[p][0][0]
            q01 += self.q[p][0][1]
            q10 += self.q[p][1][0]
            q11 += self.q[p][1][1]
        q_values.append(q00)
        q_values.append(q01)
        q_values.append(q10)
        q_values.append(q11)
        print(self.batch_time_list)
        with open("record128.txt", "a", encoding="utf8") as fw:
            fw.write("Q Learning:" + str(self.batch_time_list) + "\n\n")
        return total_end - total_start, q_values, self.total_batch_time
コード例 #5
0
    def foresee(self):
        self.batch_time_list = []
        file_name, file_cypher_name, random_file_name = self.get_file_paths()
        f_mysql = open(file_name, 'r', encoding='utf8')
        f_cypher = open(file_cypher_name, 'r', encoding='utf8')
        mysql_queries = f_mysql.readlines()
        cypher_queries = f_cypher.readlines()

        query_order = get_query_order(random_file_name, len(mysql_queries),
                                      self.query_order)
        wufenzhiyi = len(query_order) / 5
        total_start = time.perf_counter()
        for batch_number in range(5):
            start_batch_number = int(batch_number * wufenzhiyi)
            end_batch_number = int((batch_number + 1) * wufenzhiyi)
            if batch_number == 9:
                end_batch_number = len(mysql_queries) - 1
            batch_p_list = set()  # 存储每批次的p

            total_batch_start = time.perf_counter()
            print("+++++++++++++++++++++++++foresee+++++++++++++++++++++++")
            foresee_dict = {}
            foresee_number_dict = {}
            for p in self.transfer_record.keys():
                self.transfer_record[p] = False
            for query_number in query_order[
                    start_batch_number:end_batch_number]:
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                if not flag:
                    continue
                list_hash = hash(tuple(sub_p_list))
                if list_hash not in foresee_number_dict:
                    foresee_number_dict[list_hash] = sub_p_list
                if list_hash in foresee_dict:
                    foresee_dict[list_hash] += 1
                else:
                    foresee_dict[list_hash] = 0
            print("Foresee number list:" + str(foresee_number_dict))
            print("Foresee Dict:" + str(foresee_dict))
            for key, value in foresee_dict.items():
                if value == max(foresee_dict.values()):
                    print("Foresee p list:" + str(key))
                    for p in foresee_number_dict[key]:
                        self.transfer_record[p] = True
                    transfer_batch_time = transfer_neo4j_import(
                        set(foresee_number_dict[key]), self.flag)
                    break
            batch_start = time.perf_counter()
            for query_number in query_order[
                    start_batch_number:end_batch_number]:
                print("===================================================")
                print("start query online:" + str(query_number))
                fail_start = time.perf_counter()
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                print(new_mysql)
                print(new_cypher)
                print(other_mysql)
                print(length)
                # 如果没有子结构
                if not flag:
                    print(
                        "There is no sub query struct, start MySQL Query instead"
                    )
                    t = threading.Thread(target=self.rdb_query_time,
                                         args=(mysql_queries[query_number], ))
                    t.start()
                    block_time = self.max_time
                    t.join(block_time)
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = block_time
                        print("Stop the mysql query immediately, time:" +
                              str(mysql_time))
                        # self.mysql_query_total_time += mysql_time
                        fail_end = time.perf_counter()
                        self.overtime_query[
                            query_number] = fail_end - fail_start
                        self.rdb_batch_time[batch_number] += self.max_time
                    else:
                        mysql_time = self.this_time_sql
                        print("mysql query successfully , time:" +
                              str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                        self.rdb_batch_time[batch_number] += mysql_time
                    breakdown_time = time.perf_counter()
                    self.breakdown[query_number] = breakdown_time - total_start
                    print("immediately mysql query over, start next query")
                    continue
                else:
                    print("detected the sub query struct")

                # 获取p的占比
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rates: " + str(sub_p_rate))
                for p in sub_p_list:  # 取出本次子结构没有transfer的p
                    if not self.transfer_record[p]:
                        batch_p_list.add(p)

                # 判断是否全部在Neo4j中
                transfer_flag = False
                for p in sub_p_list:
                    if not self.transfer_record[p]:  # 有一个没transfer的就为True
                        transfer_flag = True
                self.show_transferred_p()
                # 如果一个没transfer的都没有,全部已经transfer
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    print(
                        "All p in sub query is transferred, use Neo4j to query"
                    )
                    t2 = threading.Thread(target=self.neo_query_time,
                                          args=(new_cypher, length))
                    t2.start()
                    t2.join(100)
                    if t2.is_alive():
                        stop_thread(t2)
                        print("Neo4j over time!")
                        self.gdb_batch_time[batch_number] += 100
                        continue
                    else:
                        self.gdb_batch_time[batch_number] += self.this_time_neo
                        print("neo4j query success")

                    neo4j_time, result_list = self.this_time_neo, self.this_time_results
                    # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate)
                    if len(result_list) != 0:
                        t1 = threading.Thread(target=self.rdb_second_query,
                                              args=(other_mysql, result_list))
                        t1.start()
                        t1.join(self.max_time)
                        if t1.is_alive():
                            stop_thread(t1)
                            print("Final mysql query overtime:" +
                                  str(self.max_time))
                            fail_end = time.perf_counter()
                            self.overtime_query[
                                query_number] = fail_end - fail_start
                            self.rdb_batch_time[batch_number] += self.max_time
                        else:
                            self.rdb_batch_time[
                                batch_number] += self.query_time
                            print("Final query run successfully:" +
                                  str(self.query_time))
                    else:
                        print(
                            "Because of the sub query result length is 0, so jump over the other query"
                        )
                    continue
                # 如果有一个没transfer的,就在mysql中直接进行
                else:
                    print("have some p not in neo4j, choose to use mysql")
                    t = threading.Thread(target=self.rdb_query_time,
                                         args=(mysql_queries[query_number], ))
                    t.start()
                    block_time = self.max_time
                    t.join(block_time)
                    print("mysql is still running:" + str(t.is_alive()))
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = block_time
                        print("force stop mysql, time:" + str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                        fail_end = time.perf_counter()
                        self.overtime_query[
                            query_number] = fail_end - fail_start
                        self.rdb_batch_time[batch_number] += self.max_time
                    else:
                        mysql_time = self.this_time_sql
                        self.rdb_batch_time[batch_number] += mysql_time
                        print("mysql query successfully, time:" +
                              str(mysql_time))
                        self.mysql_query_total_time += mysql_time
                    breakdown_time = time.perf_counter()
                    self.breakdown[query_number] = breakdown_time - total_start
                    continue
            batch_end = time.perf_counter()
            self.total_batch_time += batch_end - batch_start
            self.batch_time_list.append(batch_end - batch_start)
            print("The No." + str(batch_number) +
                  " batch query Online over, time :" +
                  str(batch_end - batch_start))
            total_batch_end = time.perf_counter()
            self.offandon_batch_time[
                batch_number] += total_batch_end - total_batch_start

    # self.show_matrix()
        print('\n\n\n')
        total_end = time.perf_counter()
        # self.db.close()
        q_values = []
        q00 = 0
        q01 = 0
        q10 = 0
        q11 = 0
        for p in self.q.keys():
            q00 += self.q[p][0][0]
            q01 += self.q[p][0][1]
            q10 += self.q[p][1][0]
            q11 += self.q[p][1][1]
        q_values.append(q00)
        q_values.append(q01)
        q_values.append(q10)
        q_values.append(q11)

        expect_failed = self.total_batch_time
        overtime_list = []
        for number in self.overtime_query.keys():
            overtime_list.append(number)
            expect_failed -= self.overtime_query[number]
        print("over time queries:" + str(overtime_list))
        print("total time after strip:" + str(expect_failed))
        print(self.batch_time_list)
        self.save()
        print("RDB time: " + str(self.rdb_batch_time))
        print("GDB time: " + str(self.gdb_batch_time))
        print("Transfer time: " + str(self.transfer_batch_time))
        print("Training time: " + str(self.training_batch_time))
        print("Offline + Online Batch time: " + str(self.offandon_batch_time))

        self.rdb_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
        self.gdb_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
        self.transfer_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
        self.training_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
        self.offandon_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}

        return total_end - total_start, q_values, self.total_batch_time, overtime_list, expect_failed, self.batch_time_list
コード例 #6
0
    def q_learning(self):
        mysql_queries, cypher_queries, query_order = self._q_learning_init()
        batch_number = int(self.cf.get("rdb-gdb", "batch_number"))
        one_batch_length = len(query_order) / batch_number
        total_start = time.perf_counter()  # start: Total Time
        for batch_number in range(batch_number):
            start_batch_number = int(batch_number * one_batch_length)
            end_batch_number = int((batch_number + 1) * one_batch_length)
            if batch_number == 9:
                end_batch_number = len(mysql_queries) - 1
            batch_p_list = set()  # 存储每批次的p
            batch_start = time.perf_counter()  # start: Batch Time
            for query_number in query_order[start_batch_number:end_batch_number]:
                print("===================================================")
                print("start query online:" + str(query_number))
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])
                p_exist_flag = True
                if flag:
                    for p in sub_p_list:
                        if p not in self.transfer_record:
                            p_exist_flag = False
                # 如果没有子结构
                if not flag or not p_exist_flag:
                    print("There is no sub query struct, start MySQL Query instead")
                    t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],))
                    t.start()
                    t.join(self.max_time)
                    if t.is_alive():
                        stop_thread(t)
                        mysql_time = self.max_time
                        print("Stop the mysql query immediately, time:" + str(mysql_time))
                    else:
                        mysql_time = self.this_time_sql
                        print("mysql query successfully , time:" + str(mysql_time))
                    print("immediately mysql query over, start next query")
                    continue
                else:
                    print("detected the sub query struct")
                print("Sub sql query: " + new_mysql)
                print("Sub cypher query: " + new_cypher)
                print("Other query: " + other_mysql)
                print("Length: " + str(length))

                # 获取p的占比
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rates: " + str(sub_p_rate))
                for p in sub_p_list:  # 取出本次子结构没有transfer的p
                    if not self.transfer_record[p]:
                        batch_p_list.add(p)

                # 判断是否全部在Neo4j中
                transfer_flag = False
                for p in sub_p_list:
                    if not self.transfer_record[p]:  # 有一个没transfer的就为True
                        transfer_flag = True
                self.show_transferred_p()
                # 如果一个没transfer的都没有,全部已经transfer
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    print("All p in sub query is transferred, use Neo4j to query")
                    t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length))
                    t2.start()
                    t2.join(100)
                    if t2.is_alive():
                        stop_thread(t2)
                        print("Neo4j over time!")
                        continue
                    else:
                        print("Neo4j query success")

                    neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results
                    # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate)
                    if len(result_list) != 0:
                        t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list))
                        t1.start()
                        t1.join(self.max_time)
                        if t1.is_alive():
                            stop_thread(t1)
                            print("Final mysql query overtime:" + str(self.max_time))
                        else:
                            print("Final query run successfully:" + str(self.second_query_time))
                    else:
                        print("Because of the sub query result length is 0, so jump over the other query")
                    continue
                # 如果有一个没transfer的,就在mysql中直接进行
                else:
                    print("Have some p not in neo4j, choose to use mysql")
                    t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],))
                    t.start()
                    t.join(self.max_time)
                    print("Mysql is still running:" + str(t.is_alive()))
                    if t.is_alive():
                        stop_thread(t)
                        print("Force stop mysql, time:" + str(self.max_time))
                    else:
                        print("Mysql query successfully, time:" + str(self.this_time_sql))
                    continue
            batch_end = time.perf_counter()  # end: Batch Time
            self.total_batch_time += batch_end - batch_start
            self.batch_time_list.append(batch_end - batch_start)
            print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start))
            # ==========================================================================================================
            # 结束一个批次的处理,开始Offline过程
            print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
            print("Start process No." + str(batch_number) + "batch transferring")
            for query_number in query_order[start_batch_number:end_batch_number]:
                print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
                print("Start process number: " + str(query_number))
                # 首先判断子结构
                flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \
                    find_oname(mysql_queries[query_number], cypher_queries[query_number])

                p_exist_flag = True
                if flag:
                    for p in sub_p_list:
                        if p not in self.transfer_record:
                            p_exist_flag = False
                # 如果没有子结构
                if not flag or not p_exist_flag:
                    continue
                last_time_record = []
                for p in self.transfer_record.keys():
                    if self.transfer_record[p]:
                        last_time_record.append(p)
                print("New SQL:" + new_mysql)
                print("New Cypher:" + new_cypher)
                print("Other SQL:" + other_mysql)
                # 判断子查询谓语是否包含在图数据库中
                transfer_flag = False
                for p in sub_p_list:
                    if not self.transfer_record[p]:  # 有一个没transfer的就为True
                        transfer_flag = True
                sub_p_rate = count_rate(sub_p_list)
                print("Get the p rate of the sub query" + str(sub_p_rate))
                if not transfer_flag:  # 如果一个没transfer的都没有,全部已经transfer
                    print("All p in sub query are in Neo4j")
                    t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length))
                    t2.start()
                    t2.join(100)
                    if t2.is_alive():
                        stop_thread(t2)
                        print("Neo4j over time!" + str(self.max_time))
                        continue
                    else:
                        print("Neo4j query success")
                    neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results

                    self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate, last_time_record)
                    continue

                # 根据Q学习判断是否Transfer
                need_transfer = set()
                # 根据Q学习判断是否迁移
                total_reward = 0
                for p in sub_p_list:
                    total_reward += self.reward[p][0][1]
                print("Total reward: " + str(total_reward))
                if total_reward > 0:
                    print("Total reward > 0")
                    for p in sub_p_list:
                        need_transfer.add(p)
                elif total_reward == 0:
                    if random.randint(1, 101) < 100 * self.prob:  # 如果等于0,随机判断
                        print("Total reward = 0, random choose to transfer")
                        for p in sub_p_list:
                            need_transfer.add(p)
                    else:
                        print("Total reward = 0, random choose to continue")
                else:
                    print("Total reward < 0, choose to continue")

                if len(need_transfer) == 0:
                    print("No p to transfer, continue")
                    continue

                sub_total_number = 0
                for p in set(need_transfer):
                    if not self.transfer_record[p]:
                        sub_total_number += int(self.numbers[p])
                if self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent:
                    print("Because of the data in Neo4j is too much, now trying to delete some p")
                    for p in set(self.transfer_record.keys()):
                        if self.transfer_record[p]:
                            if self.reward[p][1][0] < 0:
                                print(p + " R10 < 0, be retrieved")
                                p_number = int(self.numbers[p])
                                self.transfer_record[p] = False
                                self.total_neo4j_number -= p_number
                            if self.reward[p][1][0] == 0:
                                if random.randint(1, 11) > 10 * self.prob:
                                    print(p + " R10=0, random choose to be retrieved")
                                    p_number = int(self.numbers[p])
                                    self.total_neo4j_number -= p_number
                                    self.transfer_record[p] = False
                    while self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent:
                        min_r10_p = ""
                        min_r10 = 1e10
                        for p in set(self.transfer_record.keys()):
                            if self.transfer_record[p]:
                                if self.reward[p][1][0] < min_r10:
                                    min_r10_p = p
                                    min_r10 = self.reward[p][1][0]
                        if min_r10_p == '':
                            print("There is no p in Neo4j, directly transfer.")
                            break
                        print("Still ovev data threshold, delete" + min_r10_p + " because its R10 is the min:" + str(min_r10))
                        p_number = int(self.numbers[min_r10_p])
                        self.total_neo4j_number -= p_number
                        self.transfer_record[min_r10_p] = False

                # 将剩下的已transfer的p导入
                for p in self.transfer_record.keys():
                    if self.transfer_record[p]:
                        need_transfer.add(p)

                # delete original transfer record
                for p in self.transfer_record.keys():
                    self.transfer_record[p] = False
                print("need transfer: " + str(need_transfer))
                # 开始transfer
                print("transfering")
                transfer_batch_time = self.transfer_neo4j_import_no_merge(set(need_transfer))
                print("transfer time:" + str(transfer_batch_time))
                for p in need_transfer:
                    self.transfer_record[p] = True
                print("transfer over")

                # 计算奖励和Q表
                print("calculate q and reward")
                t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length))
                t2.start()
                t2.join(self.max_time)
                if t2.is_alive():
                    stop_thread(t2)
                    print("Neo4j over time!")
                    neo4j_time = self.max_time
                else:
                    print("neo4j query success")
                    neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results
                print("neo4j time:" + str(neo4j_time))
                self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate, last_time_record)
        total_end = time.perf_counter()  # end: Total Time
        q_values = []
        q00 = 0
        q01 = 0
        q10 = 0
        q11 = 0
        for p in self.q.keys():
            q00 += self.q[p][0][0]
            q01 += self.q[p][0][1]
            q10 += self.q[p][1][0]
            q11 += self.q[p][1][1]
        q_values.append(q00)
        q_values.append(q01)
        q_values.append(q10)
        q_values.append(q11)
        print(self.batch_time_list)
        with open("record128.txt", "a", encoding="utf8") as fw:
            fw.write("Q Learning:" + str(self.batch_time_list) + "\n\n")
        return total_end - total_start, q_values, self.total_batch_time