def update_after_transfer(self, mysql, neo4j_time, sub_p_rate, last_record): print("neo4j query time: " + str(neo4j_time)) t = threading.Thread(target=self.rdb_query_time, args=(mysql,)) t.start() try: t.join(int(float(neo4j_time) * float(self.neo4j_times))) except RuntimeError: print(self.neo4j_times) print(neo4j_time) exit(0) print("mysql is still running:" + str(t.is_alive())) if t.is_alive(): stop_thread(t) mysql_time = neo4j_time * self.neo4j_times print("stop mysql immadietely, time:" + str(mysql_time)) else: mysql_time = self.this_time_sql print("mysql query successfully, time:" + str(mysql_time)) improvement = mysql_time - neo4j_time for p in set(sub_p_rate.keys()): if p in last_record: self.reward[p][1][0] += improvement * sub_p_rate[p] self.update_q(1, 0, p) else: self.reward[p][0][1] += improvement * sub_p_rate[p] self.update_q(0, 1, p) self.this_time_sql = 0
def stop_polling(self): """ 立即停止轮询 :return:无 """ # # 如果正在轮询,就停止轮询 # 点击“停止轮询”,则按钮立即显示“开始轮询” # if self.isPolling: self.ui.btn_submit.setText("开始轮询") self.set_enable(True) # 恢复允许编辑 self.isPolling = False self.append_info("结束轮询") # 信息台输出结束信息 self.close_serial() # # 强制停止子线程 # if self.polling_timer is not None and self.polling_timer.is_alive( ): util.stop_thread(self.polling_timer)
def lru(self): self.total_batch_time = 0 self.batch_time_list = [] file_name, file_cypher_name, random_file_name = self.get_file_paths() f_mysql = open(file_name, 'r', encoding='utf8') f_cypher = open(file_cypher_name, 'r', encoding='utf8') mysql_queries = f_mysql.readlines() cypher_queries = f_cypher.readlines() query_order = get_query_order(random_file_name, len(mysql_queries), self.query_order) wufenzhiyi = len(query_order) / 5 total_start = time.perf_counter() for batch_number in range(5): start_batch_number = int(batch_number * wufenzhiyi) end_batch_number = int((batch_number + 1) * wufenzhiyi) if batch_number == 9: end_batch_number = len(mysql_queries) - 1 batch_p_list = set() # 存储每批次的p batch_start = time.perf_counter() total_batch_start = time.perf_counter() for query_number in query_order[start_batch_number:end_batch_number]: print("===================================================") print("start query online:" + str(query_number)) fail_start = time.perf_counter() # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) print(new_mysql) print(new_cypher) print(other_mysql) print(length) # 如果没有子结构 if not flag: print("There is no sub query struct, start MySQL Query instead") t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],)) t.start() block_time = self.max_time t.join(block_time) if t.is_alive(): stop_thread(t) mysql_time = block_time print("Stop the mysql query immediately, time:" + str(mysql_time)) # self.mysql_query_total_time += mysql_time fail_end = time.perf_counter() self.overtime_query[query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: mysql_time = self.this_time_sql print("mysql query successfully , time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time self.rdb_batch_time[batch_number] += mysql_time breakdown_time = time.perf_counter() self.breakdown[query_number] = breakdown_time - total_start print("immediately mysql query over, start next query") continue else: print("detected the sub query struct") # 获取p的占比 sub_p_rate = count_rate(sub_p_list) print("Get the p rates: " + str(sub_p_rate)) for p in sub_p_list: # 取出本次子结构没有transfer的p if not self.transfer_record[p]: batch_p_list.add(p) # 判断是否全部在Neo4j中 transfer_flag = False for p in sub_p_list: if not p in self.lru_list: # 有一个没transfer的就为True transfer_flag = True self.show_transferred_p() # 如果一个没transfer的都没有,全部已经transfer if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer for p in sub_p_list: self.lru_list.remove(p) self.lru_list.append(p) print("All p in sub query is transferred, use Neo4j to query") t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(100) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!") self.gdb_batch_time[batch_number] += 100 continue else: self.gdb_batch_time[batch_number] += self.this_time_neo print("neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_results # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate) if len(result_list) != 0: t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list)) t1.start() t1.join(self.max_time) if t1.is_alive(): stop_thread(t1) print("Final mysql query overtime:" + str(self.max_time)) fail_end = time.perf_counter() self.overtime_query[query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: self.rdb_batch_time[batch_number] += self.query_time print("Final query run successfully:" + str(self.query_time)) else: print("Because of the sub query result length is 0, so jump over the other query") continue # 如果有一个没transfer的,就在mysql中直接进行 else: print("have some p not in neo4j, choose to use mysql") for p in sub_p_list: if p in self.lru_list: self.lru_list.remove(p) self.lru_list.append(p) t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],)) t.start() block_time = self.max_time t.join(block_time) print("mysql is still running:" + str(t.is_alive())) if t.is_alive(): stop_thread(t) mysql_time = block_time print("force stop mysql, time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time fail_end = time.perf_counter() self.overtime_query[query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: mysql_time = self.this_time_sql self.rdb_batch_time[batch_number] += mysql_time print("mysql query successfully, time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time breakdown_time = time.perf_counter() self.breakdown[query_number] = breakdown_time - total_start continue batch_end = time.perf_counter() self.total_batch_time += batch_end - batch_start self.batch_time_list.append(batch_end - batch_start) print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start)) # ========================================================================================================== # 结束一个批次的处理,开始Offline过程 print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") print("Start process No." + str(batch_number) + "batch transferring") offline_training_time = time.perf_counter() for query_number in query_order[start_batch_number:end_batch_number]: print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") print("start process number: " + str(query_number)) # if query_number < 10 or query_number > 15: # continue # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) if not flag: # 如果没有子结构 continue # 判断子查询谓语是否包含在图数据库中 transfer_flag = False for p in sub_p_list: if p not in self.lru_list: # 有一个没transfer的就为True transfer_flag = True sub_p_rate = count_rate(sub_p_list) print("Get the p rate of the sub query" + str(sub_p_rate)) if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer for p in sub_p_list: self.lru_list.remove(p) self.lru_list.append(p) continue # 根据Q学习判断是否Transfer need_transfer = set() for p in sub_p_list: need_transfer.add(p) sub_total_number = 0 for p in set(need_transfer): if p not in self.lru_list: sub_total_number += int(self.numbers[p]) while self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent: print("Because of the data in Neo4j is too much, now trying to delete some p") need_retrieve_p = self.lru_list[0] p_number = int(self.numbers[need_retrieve_p]) self.total_neo4j_number -= p_number self.lru_list.remove(need_retrieve_p) # 将剩下的已transfer的p导入 for p in self.lru_list: need_transfer.add(p) # delete original transfer record self.lru_list.clear() print("need transfer: " + str(need_transfer)) # 开始transfer print("transfering") transfer_batch_time = transfer_neo4j_import(set(need_transfer), self.flag) self.transfer_batch_time[batch_number] += transfer_batch_time for p in need_transfer: self.lru_list.append(p) self.transfer_total_time += transfer_batch_time print("transfer over") offline_training_time_end = time.perf_counter() self.training_batch_time[batch_number] += offline_training_time_end - offline_training_time - \ self.transfer_batch_time[batch_number] total_batch_end = time.perf_counter() self.offandon_batch_time[batch_number] += total_batch_end - total_batch_start # self.show_matrix() print('\n\n\n') total_end = time.perf_counter() expect_failed = self.total_batch_time overtime_list = [] for number in self.overtime_query.keys(): overtime_list.append(number) expect_failed -= self.overtime_query[number] print("over time queries:" + str(overtime_list)) print("total time after strip:" + str(expect_failed)) print(self.batch_time_list) self.save() return total_end - total_start, [], self.total_batch_time, overtime_list, expect_failed, self.batch_time_list
def random_policy(self): mysql_queries, cypher_queries, query_order = self._q_learning_init() batch_number = int(self.cf.get("rdb-gdb", "batch_number")) one_batch_length = len(query_order) / batch_number total_start = time.perf_counter() # start: Total Time for batch_number in range(batch_number): start_batch_number = int(batch_number * one_batch_length) end_batch_number = int((batch_number + 1) * one_batch_length) if batch_number == 9: end_batch_number = len(mysql_queries) - 1 batch_p_list = set() # 存储每批次的p batch_start = time.perf_counter() # start: Batch Time for query_number in query_order[ start_batch_number:end_batch_number]: print("===================================================") print("start query online:" + str(query_number)) # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) p_exist_flag = True if flag: for p in sub_p_list: if p not in self.transfer_record: p_exist_flag = False # 如果没有子结构 if not flag or not p_exist_flag: print( "There is no sub query struct, start MySQL Query instead" ) t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number], )) t.start() t.join(self.max_time) if t.is_alive(): stop_thread(t) mysql_time = self.max_time print("Stop the mysql query immediately, time:" + str(mysql_time)) else: mysql_time = self.this_time_sql print("mysql query successfully , time:" + str(mysql_time)) print("immediately mysql query over, start next query") continue else: print("detected the sub query struct") print("Sub sql query: " + new_mysql) print("Sub cypher query: " + new_cypher) print("Other query: " + other_mysql) print("Length: " + str(length)) # 获取p的占比 sub_p_rate = count_rate(sub_p_list) print("Get the p rates: " + str(sub_p_rate)) for p in sub_p_list: # 取出本次子结构没有transfer的p if not self.transfer_record[p]: batch_p_list.add(p) # 判断是否全部在Neo4j中 transfer_flag = False for p in sub_p_list: if not self.transfer_record[p]: # 有一个没transfer的就为True transfer_flag = True self.show_transferred_p() # 如果一个没transfer的都没有,全部已经transfer if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer print( "All p in sub query is transferred, use Neo4j to query" ) t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(100) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!") continue else: print("Neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate) if len(result_list) != 0: t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list)) t1.start() t1.join(self.max_time) if t1.is_alive(): stop_thread(t1) print("Final mysql query overtime:" + str(self.max_time)) else: print("Final query run successfully:" + str(self.second_query_time)) else: print( "Because of the sub query result length is 0, so jump over the other query" ) continue # 如果有一个没transfer的,就在mysql中直接进行 else: print("Have some p not in neo4j, choose to use mysql") t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number], )) t.start() t.join(self.max_time) print("Mysql is still running:" + str(t.is_alive())) if t.is_alive(): stop_thread(t) print("Force stop mysql, time:" + str(self.max_time)) else: print("Mysql query successfully, time:" + str(self.this_time_sql)) continue batch_end = time.perf_counter() # end: Batch Time self.total_batch_time += batch_end - batch_start self.batch_time_list.append(batch_end - batch_start) print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start)) self.run_batches += 1 if self.run_batches == 6: break # ========================================================================================================== # 结束一个批次的处理,开始Offline过程 print( "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) print("Start process No." + str(batch_number) + "batch transferring") # 随机转移策略:在每个批次之后都会随机转移之前批次20个查询 # 获取之前批次的所有查询 former_sql = mysql_queries[:end_batch_number] former_cypher = cypher_queries[:end_batch_number] need_transfer_p_set = set() random_choose = random.sample( range(len(former_sql)), end_batch_number - start_batch_number) # 将原来的全归零 for p in self.transfer_record: self.transfer_record[p] = False self.total_neo4j_number = 0 # 将随机选择的加入集合 full_flag = False for num in random_choose: flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(former_sql[num], former_cypher[num]) print(flag) print(sub_p_list) if flag: for p in sub_p_list: if not full_flag and p in self.transfer_record: need_transfer_p_set.add(p) self.total_neo4j_number += int(self.numbers[p]) self.transfer_record[p] = True else: break if int( self.numbers[p] ) + self.total_neo4j_number >= self.total_number * self.transfer_percent: full_flag = True if full_flag: break print(random_choose) print(need_transfer_p_set) if len(need_transfer_p_set) != 0: self.transfer_neo4j_import_no_merge(need_transfer_p_set) total_end = time.perf_counter() # end: Total Time q_values = [] q00 = 0 q01 = 0 q10 = 0 q11 = 0 for p in self.q.keys(): q00 += self.q[p][0][0] q01 += self.q[p][0][1] q10 += self.q[p][1][0] q11 += self.q[p][1][1] q_values.append(q00) q_values.append(q01) q_values.append(q10) q_values.append(q11) print(self.batch_time_list) with open("record128.txt", "a", encoding="utf8") as fw: fw.write("Q Learning:" + str(self.batch_time_list) + "\n\n") return total_end - total_start, q_values, self.total_batch_time
def foresee(self): self.batch_time_list = [] file_name, file_cypher_name, random_file_name = self.get_file_paths() f_mysql = open(file_name, 'r', encoding='utf8') f_cypher = open(file_cypher_name, 'r', encoding='utf8') mysql_queries = f_mysql.readlines() cypher_queries = f_cypher.readlines() query_order = get_query_order(random_file_name, len(mysql_queries), self.query_order) wufenzhiyi = len(query_order) / 5 total_start = time.perf_counter() for batch_number in range(5): start_batch_number = int(batch_number * wufenzhiyi) end_batch_number = int((batch_number + 1) * wufenzhiyi) if batch_number == 9: end_batch_number = len(mysql_queries) - 1 batch_p_list = set() # 存储每批次的p total_batch_start = time.perf_counter() print("+++++++++++++++++++++++++foresee+++++++++++++++++++++++") foresee_dict = {} foresee_number_dict = {} for p in self.transfer_record.keys(): self.transfer_record[p] = False for query_number in query_order[ start_batch_number:end_batch_number]: flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) if not flag: continue list_hash = hash(tuple(sub_p_list)) if list_hash not in foresee_number_dict: foresee_number_dict[list_hash] = sub_p_list if list_hash in foresee_dict: foresee_dict[list_hash] += 1 else: foresee_dict[list_hash] = 0 print("Foresee number list:" + str(foresee_number_dict)) print("Foresee Dict:" + str(foresee_dict)) for key, value in foresee_dict.items(): if value == max(foresee_dict.values()): print("Foresee p list:" + str(key)) for p in foresee_number_dict[key]: self.transfer_record[p] = True transfer_batch_time = transfer_neo4j_import( set(foresee_number_dict[key]), self.flag) break batch_start = time.perf_counter() for query_number in query_order[ start_batch_number:end_batch_number]: print("===================================================") print("start query online:" + str(query_number)) fail_start = time.perf_counter() # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) print(new_mysql) print(new_cypher) print(other_mysql) print(length) # 如果没有子结构 if not flag: print( "There is no sub query struct, start MySQL Query instead" ) t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number], )) t.start() block_time = self.max_time t.join(block_time) if t.is_alive(): stop_thread(t) mysql_time = block_time print("Stop the mysql query immediately, time:" + str(mysql_time)) # self.mysql_query_total_time += mysql_time fail_end = time.perf_counter() self.overtime_query[ query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: mysql_time = self.this_time_sql print("mysql query successfully , time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time self.rdb_batch_time[batch_number] += mysql_time breakdown_time = time.perf_counter() self.breakdown[query_number] = breakdown_time - total_start print("immediately mysql query over, start next query") continue else: print("detected the sub query struct") # 获取p的占比 sub_p_rate = count_rate(sub_p_list) print("Get the p rates: " + str(sub_p_rate)) for p in sub_p_list: # 取出本次子结构没有transfer的p if not self.transfer_record[p]: batch_p_list.add(p) # 判断是否全部在Neo4j中 transfer_flag = False for p in sub_p_list: if not self.transfer_record[p]: # 有一个没transfer的就为True transfer_flag = True self.show_transferred_p() # 如果一个没transfer的都没有,全部已经transfer if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer print( "All p in sub query is transferred, use Neo4j to query" ) t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(100) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!") self.gdb_batch_time[batch_number] += 100 continue else: self.gdb_batch_time[batch_number] += self.this_time_neo print("neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_results # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate) if len(result_list) != 0: t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list)) t1.start() t1.join(self.max_time) if t1.is_alive(): stop_thread(t1) print("Final mysql query overtime:" + str(self.max_time)) fail_end = time.perf_counter() self.overtime_query[ query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: self.rdb_batch_time[ batch_number] += self.query_time print("Final query run successfully:" + str(self.query_time)) else: print( "Because of the sub query result length is 0, so jump over the other query" ) continue # 如果有一个没transfer的,就在mysql中直接进行 else: print("have some p not in neo4j, choose to use mysql") t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number], )) t.start() block_time = self.max_time t.join(block_time) print("mysql is still running:" + str(t.is_alive())) if t.is_alive(): stop_thread(t) mysql_time = block_time print("force stop mysql, time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time fail_end = time.perf_counter() self.overtime_query[ query_number] = fail_end - fail_start self.rdb_batch_time[batch_number] += self.max_time else: mysql_time = self.this_time_sql self.rdb_batch_time[batch_number] += mysql_time print("mysql query successfully, time:" + str(mysql_time)) self.mysql_query_total_time += mysql_time breakdown_time = time.perf_counter() self.breakdown[query_number] = breakdown_time - total_start continue batch_end = time.perf_counter() self.total_batch_time += batch_end - batch_start self.batch_time_list.append(batch_end - batch_start) print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start)) total_batch_end = time.perf_counter() self.offandon_batch_time[ batch_number] += total_batch_end - total_batch_start # self.show_matrix() print('\n\n\n') total_end = time.perf_counter() # self.db.close() q_values = [] q00 = 0 q01 = 0 q10 = 0 q11 = 0 for p in self.q.keys(): q00 += self.q[p][0][0] q01 += self.q[p][0][1] q10 += self.q[p][1][0] q11 += self.q[p][1][1] q_values.append(q00) q_values.append(q01) q_values.append(q10) q_values.append(q11) expect_failed = self.total_batch_time overtime_list = [] for number in self.overtime_query.keys(): overtime_list.append(number) expect_failed -= self.overtime_query[number] print("over time queries:" + str(overtime_list)) print("total time after strip:" + str(expect_failed)) print(self.batch_time_list) self.save() print("RDB time: " + str(self.rdb_batch_time)) print("GDB time: " + str(self.gdb_batch_time)) print("Transfer time: " + str(self.transfer_batch_time)) print("Training time: " + str(self.training_batch_time)) print("Offline + Online Batch time: " + str(self.offandon_batch_time)) self.rdb_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} self.gdb_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} self.transfer_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} self.training_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} self.offandon_batch_time = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0} return total_end - total_start, q_values, self.total_batch_time, overtime_list, expect_failed, self.batch_time_list
def q_learning(self): mysql_queries, cypher_queries, query_order = self._q_learning_init() batch_number = int(self.cf.get("rdb-gdb", "batch_number")) one_batch_length = len(query_order) / batch_number total_start = time.perf_counter() # start: Total Time for batch_number in range(batch_number): start_batch_number = int(batch_number * one_batch_length) end_batch_number = int((batch_number + 1) * one_batch_length) if batch_number == 9: end_batch_number = len(mysql_queries) - 1 batch_p_list = set() # 存储每批次的p batch_start = time.perf_counter() # start: Batch Time for query_number in query_order[start_batch_number:end_batch_number]: print("===================================================") print("start query online:" + str(query_number)) # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) p_exist_flag = True if flag: for p in sub_p_list: if p not in self.transfer_record: p_exist_flag = False # 如果没有子结构 if not flag or not p_exist_flag: print("There is no sub query struct, start MySQL Query instead") t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],)) t.start() t.join(self.max_time) if t.is_alive(): stop_thread(t) mysql_time = self.max_time print("Stop the mysql query immediately, time:" + str(mysql_time)) else: mysql_time = self.this_time_sql print("mysql query successfully , time:" + str(mysql_time)) print("immediately mysql query over, start next query") continue else: print("detected the sub query struct") print("Sub sql query: " + new_mysql) print("Sub cypher query: " + new_cypher) print("Other query: " + other_mysql) print("Length: " + str(length)) # 获取p的占比 sub_p_rate = count_rate(sub_p_list) print("Get the p rates: " + str(sub_p_rate)) for p in sub_p_list: # 取出本次子结构没有transfer的p if not self.transfer_record[p]: batch_p_list.add(p) # 判断是否全部在Neo4j中 transfer_flag = False for p in sub_p_list: if not self.transfer_record[p]: # 有一个没transfer的就为True transfer_flag = True self.show_transferred_p() # 如果一个没transfer的都没有,全部已经transfer if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer print("All p in sub query is transferred, use Neo4j to query") t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(100) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!") continue else: print("Neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results # self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate) if len(result_list) != 0: t1 = threading.Thread(target=self.rdb_second_query, args=(other_mysql, result_list)) t1.start() t1.join(self.max_time) if t1.is_alive(): stop_thread(t1) print("Final mysql query overtime:" + str(self.max_time)) else: print("Final query run successfully:" + str(self.second_query_time)) else: print("Because of the sub query result length is 0, so jump over the other query") continue # 如果有一个没transfer的,就在mysql中直接进行 else: print("Have some p not in neo4j, choose to use mysql") t = threading.Thread(target=self.rdb_query_time, args=(mysql_queries[query_number],)) t.start() t.join(self.max_time) print("Mysql is still running:" + str(t.is_alive())) if t.is_alive(): stop_thread(t) print("Force stop mysql, time:" + str(self.max_time)) else: print("Mysql query successfully, time:" + str(self.this_time_sql)) continue batch_end = time.perf_counter() # end: Batch Time self.total_batch_time += batch_end - batch_start self.batch_time_list.append(batch_end - batch_start) print("The No." + str(batch_number) + " batch query Online over, time :" + str(batch_end - batch_start)) # ========================================================================================================== # 结束一个批次的处理,开始Offline过程 print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") print("Start process No." + str(batch_number) + "batch transferring") for query_number in query_order[start_batch_number:end_batch_number]: print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") print("Start process number: " + str(query_number)) # 首先判断子结构 flag, sub_p_list, new_mysql, new_cypher, other_mysql, length = \ find_oname(mysql_queries[query_number], cypher_queries[query_number]) p_exist_flag = True if flag: for p in sub_p_list: if p not in self.transfer_record: p_exist_flag = False # 如果没有子结构 if not flag or not p_exist_flag: continue last_time_record = [] for p in self.transfer_record.keys(): if self.transfer_record[p]: last_time_record.append(p) print("New SQL:" + new_mysql) print("New Cypher:" + new_cypher) print("Other SQL:" + other_mysql) # 判断子查询谓语是否包含在图数据库中 transfer_flag = False for p in sub_p_list: if not self.transfer_record[p]: # 有一个没transfer的就为True transfer_flag = True sub_p_rate = count_rate(sub_p_list) print("Get the p rate of the sub query" + str(sub_p_rate)) if not transfer_flag: # 如果一个没transfer的都没有,全部已经transfer print("All p in sub query are in Neo4j") t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(100) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!" + str(self.max_time)) continue else: print("Neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate, last_time_record) continue # 根据Q学习判断是否Transfer need_transfer = set() # 根据Q学习判断是否迁移 total_reward = 0 for p in sub_p_list: total_reward += self.reward[p][0][1] print("Total reward: " + str(total_reward)) if total_reward > 0: print("Total reward > 0") for p in sub_p_list: need_transfer.add(p) elif total_reward == 0: if random.randint(1, 101) < 100 * self.prob: # 如果等于0,随机判断 print("Total reward = 0, random choose to transfer") for p in sub_p_list: need_transfer.add(p) else: print("Total reward = 0, random choose to continue") else: print("Total reward < 0, choose to continue") if len(need_transfer) == 0: print("No p to transfer, continue") continue sub_total_number = 0 for p in set(need_transfer): if not self.transfer_record[p]: sub_total_number += int(self.numbers[p]) if self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent: print("Because of the data in Neo4j is too much, now trying to delete some p") for p in set(self.transfer_record.keys()): if self.transfer_record[p]: if self.reward[p][1][0] < 0: print(p + " R10 < 0, be retrieved") p_number = int(self.numbers[p]) self.transfer_record[p] = False self.total_neo4j_number -= p_number if self.reward[p][1][0] == 0: if random.randint(1, 11) > 10 * self.prob: print(p + " R10=0, random choose to be retrieved") p_number = int(self.numbers[p]) self.total_neo4j_number -= p_number self.transfer_record[p] = False while self.total_neo4j_number + sub_total_number >= self.total_number * self.transfer_percent: min_r10_p = "" min_r10 = 1e10 for p in set(self.transfer_record.keys()): if self.transfer_record[p]: if self.reward[p][1][0] < min_r10: min_r10_p = p min_r10 = self.reward[p][1][0] if min_r10_p == '': print("There is no p in Neo4j, directly transfer.") break print("Still ovev data threshold, delete" + min_r10_p + " because its R10 is the min:" + str(min_r10)) p_number = int(self.numbers[min_r10_p]) self.total_neo4j_number -= p_number self.transfer_record[min_r10_p] = False # 将剩下的已transfer的p导入 for p in self.transfer_record.keys(): if self.transfer_record[p]: need_transfer.add(p) # delete original transfer record for p in self.transfer_record.keys(): self.transfer_record[p] = False print("need transfer: " + str(need_transfer)) # 开始transfer print("transfering") transfer_batch_time = self.transfer_neo4j_import_no_merge(set(need_transfer)) print("transfer time:" + str(transfer_batch_time)) for p in need_transfer: self.transfer_record[p] = True print("transfer over") # 计算奖励和Q表 print("calculate q and reward") t2 = threading.Thread(target=self.neo_query_time, args=(new_cypher, length)) t2.start() t2.join(self.max_time) if t2.is_alive(): stop_thread(t2) print("Neo4j over time!") neo4j_time = self.max_time else: print("neo4j query success") neo4j_time, result_list = self.this_time_neo, self.this_time_neo4j_results print("neo4j time:" + str(neo4j_time)) self.update_after_transfer(new_mysql, neo4j_time, sub_p_rate, last_time_record) total_end = time.perf_counter() # end: Total Time q_values = [] q00 = 0 q01 = 0 q10 = 0 q11 = 0 for p in self.q.keys(): q00 += self.q[p][0][0] q01 += self.q[p][0][1] q10 += self.q[p][1][0] q11 += self.q[p][1][1] q_values.append(q00) q_values.append(q01) q_values.append(q10) q_values.append(q11) print(self.batch_time_list) with open("record128.txt", "a", encoding="utf8") as fw: fw.write("Q Learning:" + str(self.batch_time_list) + "\n\n") return total_end - total_start, q_values, self.total_batch_time