def user_confirm(self): info = """ 将生成在服务器{mysql_host}上归档数据 迁移数据条件为: INSERT INTO `{target_database_name}`.`{target_table_name}` SELECT * FROM `{source_database_name}`.`{source_table_name}` WHERE {data_condition} """ info = info.format(mysql_host=self.source_mysql_server.mysql_host, source_database_name=self.source_database_name, source_table_name=self.source_table_name, target_database_name=self.target_database_name, target_table_name=self.target_table_name, data_condition=self.data_condition) PrintHelper.print_info_message(info) if self.is_dry_run: PrintHelper.print_info_message("模拟运行。。。") return True if not self.is_user_confirm: return True input_options = ['yes', 'no'] input_message = """ 请输入yes继续或输入no退出,yes/no? """ user_option = self.get_user_choose_option(input_options=input_options, input_message=input_message) if user_option == "no": return False else: return True
def create_target_table(self): if not self.is_target_table_exist(): PrintHelper.print_info_message("准备创建目标表") table_script = self.get_target_table_create_script() self.target_mysql_server.mysql_exec(sql_script=table_script) else: PrintHelper.print_info_message("目标表已存在")
def mysql_query(self, sql_script, sql_paras=None, return_dict=False, max_execute_seconds=0): """ 执行查询并返回数据 :param sql_script: :param sql_paras: :param return_dict: :param max_execute_seconds: :return: """ conn = None cursor = None try: conn = self.get_connection(return_dict=return_dict) cursor = conn.cursor() self.set_max_execute_seconds(cursor=cursor, max_execute_seconds=max_execute_seconds) if sql_paras is not None: cursor.execute(sql_script, sql_paras) else: cursor.execute(sql_script) exec_result = cursor.fetchall() conn.commit() return exec_result except Exception as ex: conn.rollback() warning_message = "执行出现异常:{}\n堆栈:{}\n语句:{}\n参数:{}".format( str(ex), traceback.format_exc(), sql_script, sql_paras ) PrintHelper.print_warning_message(warning_message) raise Exception(str(ex)) finally: if cursor is not None: cursor.close() if conn is not None: conn.close()
def mysql_exec(self, sql_script, sql_paras=None): """ 执行传入的脚本,返回影响行数 :param sql_script: :param sql_param: :return: 脚本最后一条语句执行影响行数 """ cursor = None conn = None try: conn = self.get_connection() cursor = conn.cursor() if sql_paras is not None: cursor.execute(sql_script, sql_paras) else: cursor.execute(sql_script) affect_rows = cursor.rowcount conn.commit() return affect_rows except Exception as ex: conn.rollback() warning_message = "执行出现异常:{}\n堆栈:{}\n语句:{}\n参数:{}".format( str(ex), traceback.format_exc(), sql_script, sql_paras ) PrintHelper.print_warning_message(warning_message) raise Exception(str(ex)) finally: if cursor is not None: cursor.close() if conn is not None: conn.close()
def delete_data_by_scripts(self, transfer_scripts): try: sql_script_list = [] delete_script = transfer_scripts["delete_script"] temp_script = delete_script, None sql_script_list.append(temp_script) sql_script = delete_script tmp_script = """ USE {0}; """.format(self.source_database_name) + sql_script + """ COMMIT; SELECT SLEEP('{0}'); ##=====================================================## """.format(self.batch_sleep_seconds) PrintHelper.write_file(file_path=self.transfer_script_file, message=tmp_script) if not self.is_dry_run: total_affect_rows = self.source_mysql_server.mysql_exec_many( sql_script_list) PrintHelper.print_info_message( "本次归档操作记录{}条".format(total_affect_rows)) self.sleep_with_affect_rows(total_affect_rows) else: PrintHelper.print_info_message("生成迁移脚本(未执行)") PrintHelper.print_info_message(sql_script) return True except Exception as ex: PrintHelper.print_warning_message("在归档过程中出现异常:{}\n堆栈:{}\n".format( str(ex), traceback.format_exc())) return False
def create_target_table(self): sql_script = "CREATE TABLE IF NOT EXISTS `{target_database_name}`.`{target_table_name}` LIKE `{source_database_name}`.`{source_table_name}`;".format( target_database_name=self.target_database_name, source_database_name=self.source_database_name, target_table_name=self.target_table_name, source_table_name=self.source_table_name) PrintHelper.print_info_message("建表脚本为:\n {0}".format(sql_script)) self.source_mysql_server.mysql_exec(sql_script=sql_script)
def get_user_choose_option(self, input_options, input_message): while True: PrintHelper.print_info_message(input_message) str_input = input("") for input_option in input_options: if str_input.strip() == input_option: choose_option = input_option return choose_option
def mysql_exec_many(self, script_list): """ 将多个SQL放到一个事务中执行 :param script_list: :return: """ cursor = None conn = None total_affect_rows = 0 try: conn = self.get_connection() cursor = conn.cursor() for sql_script, sql_param in script_list: if sql_param is not None: cursor.execute(sql_script, sql_param) else: cursor.execute(sql_script) affect_rows = cursor.rowcount if affect_rows is not None: total_affect_rows += affect_rows conn.commit() return total_affect_rows except Exception as ex: conn.rollback() warning_message = "执行出现异常:{}\n堆栈:{}\n参数:{}".format( str(ex), traceback.format_exc(), script_list ) PrintHelper.print_warning_message(warning_message) raise Exception(str(ex)) finally: if cursor is not None: cursor.close() if conn is not None: conn.close()
def archive_data(self): PrintHelper.print_info_message("开始检查归档配置") if not self.check_config(): return confirm_result = self.user_confirm() if not confirm_result: return PrintHelper.print_info_message("开始归档数据") self.loop_archive_data() PrintHelper.print_info_message("结束归档数据")
def main(): print artm.version() config = ConfigPaths('config.cfg') plot_maker = PlotMaker() printer = PrintHelper() print config.models_file_name batch_vectorizer = artm.BatchVectorizer( data_path=config.output_batches_path, data_format='batches') dictionary = artm.Dictionary() dictionary.load(dictionary_path=config.dictionary_path + '.dict') models_file = open(config.models_file_name, 'a') # model = process_one_model(config, batch_vectorizer, models_file, printer, plot_maker, # dictionary, _n_topics=50, _n_doc_passes=5, _seed_value=100, _n_top_tokens=10, _p_mass_threshold=0.25, # _n_iterations=20, _model_name='model1') exp = Experiment( Pool(topics_filter=OptimizationTopicsFilter(eps=10**(-2.5), verbose=False), save_topics=True)) for i in xrange(3): model_artm = process_one_model(config, batch_vectorizer, models_file, printer, plot_maker, dictionary, _n_topics=50, _n_doc_passes=5, _seed_value=100, _n_top_tokens=10, _p_mass_threshold=0.25, _n_iterations=20, _model_name='model_{}'.format(i)) #display_points(model_artm.get_phi()) exp.collect_topics(model_artm.get_phi(), model_artm.get_theta()) vals, bins = exp.topics_pool.topics_filter.plot_hist() save_hist(vals, bins, "data_iter_{}.csv".format(i)) print exp.topics_pool.get_basic_topics_count() # models_file.close()
def check_config(self): try: if str(self.source_database_name).strip() == "": PrintHelper.print_warning_message("数据库名不能为空") return False if str(self.source_table_name).strip() == "": PrintHelper.print_warning_message("表名不能为空") return False if str(self.data_condition).strip() == "": PrintHelper.print_warning_message("迁移条件不能为空") return False self.create_target_table() source_columns = self.get_source_columns() target_columns = self.get_target_columns() if len(source_columns) != len(target_columns): PrintHelper.print_warning_message("源表和目标表的列数量不匹配,不满足迁移条件") return False column_count = len(source_columns) primary_key_count = 0 for column_id in range(column_count): source_column_name = source_columns[column_id]["Field"] source_column_key = source_columns[column_id]["Key"] source_column_type = source_columns[column_id]["Type"] target_column_name = target_columns[column_id]["Field"] target_column_key = target_columns[column_id]["Key"] target_column_type = source_columns[column_id]["Type"] if source_column_name.lower() != target_column_name.lower() \ or target_column_key.lower() != target_column_key.lower() \ or source_column_type.lower() != target_column_type.lower(): PrintHelper.print_warning_message("源表和目标表的列不匹配,不满足迁移条件") return False if source_column_key.lower() == 'pri': primary_key_count += 1 self.table_primary_key = source_column_name if 'int(' in str(source_column_type).lower() \ or 'bigint(' in str(source_column_type).lower(): self.table_primary_key_type = 'INT' elif 'varchar(' in str(source_column_type).lower(): self.table_primary_key_type = 'CHAR' if self.table_primary_key_type == "": PrintHelper.print_warning_message( "主键不为int/bigint/varchar三种类型中的一种,不满足迁移条件") return False if primary_key_count == 0: PrintHelper.print_warning_message("未找到主键,不瞒足迁移条件") return False if primary_key_count > 1: PrintHelper.print_warning_message("要迁移的表使用复合主键,不满足迁移条件") return False return True except Exception as ex: PrintHelper.print_warning_message("执行出现异常,异常为{0},{1}".format( str(ex), traceback.format_exc())) return False
def loop_archive_data(self): max_key, min_key = self.get_loop_key_range() if max_key is None or min_key is None: PrintHelper.print_info_message("未找到满足条件的键区间") return current_key = min_key while current_key < max_key: if self.has_stop_file(): break PrintHelper.print_info_message("*" * 70) next_key = self.get_next_loop_key(current_key=current_key) if next_key is None: PrintHelper.print_info_message("未找到下一个可归档的区间,退出归档") break if not self.archive_range_data(current_key=current_key, next_key=next_key): PrintHelper.print_info_message("执行出现异常,退出归档!") break current_key = next_key PrintHelper.print_info_message("*" * 70) info = """最小值为:{0},最大值为:{1},当前处理值为:{2}""".format( min_key, max_key, current_key) PrintHelper.print_info_message(info) PrintHelper.print_info_message("*" * 70) PrintHelper.print_info_message("执行完成")
def archive_range_data(self, current_key, next_key): try: if self.is_dry_run: PrintHelper.print_info_message("未真实执行,未生产SQL文件。") return True range_keys = self.get_archive_range_keys(current_key, next_key) range_data = self.get_data_by_keys(range_keys=range_keys) PrintHelper.print_info_message("找到满足条件记录{}条".format( len(range_data))) PrintHelper.print_info_message("开始删除目标库已有数据") self.delete_target_data_by_keys(range_keys=range_keys) PrintHelper.print_info_message("开始向目标库上插入数据") self.insert_target_range_data(range_data=range_data) PrintHelper.print_info_message("开始删除目标库归档数据") self.delete_source_data_by_keys(range_keys=range_keys) return True except Exception as ex: PrintHelper.print_warning_message("在归档过程中出现异常:{}\n堆栈:{}\n".format( str(ex), traceback.format_exc())) return False
def has_stop_file(self): if os.path.exists(self.stop_script_file): PrintHelper.print_info_message("检查到停止文件,准备退出!") return True else: return False
def check_config(self): try: if str(self.source_database_name).strip() == "": PrintHelper.print_warning_message("数据库名不能为空") return False if str(self.source_table_name).strip() == "": PrintHelper.print_warning_message("表名不能为空") return False if str(self.data_condition).strip() == "": PrintHelper.print_warning_message("迁移条件不能为空") return False source_columns = self.get_source_columns() primary_key_count = 0 for column_item in source_columns: source_column_name = column_item["Field"] source_column_key = column_item["Key"] source_column_type = column_item["Type"] if str(source_column_key).lower() == 'pri': primary_key_count += 1 self.table_primary_key = source_column_name if 'int(' in str(source_column_type).lower() \ or 'bigint(' in str(source_column_type).lower(): self.table_primary_key_type = 'INT' elif 'varchar(' in str(source_column_type).lower(): self.table_primary_key_type = 'CHAR' if self.table_primary_key_type == "": PrintHelper.print_warning_message( "主键不为int/bigint/varchar三种类型中的一种,不满足迁移条件") return False if primary_key_count == 0: PrintHelper.print_warning_message("未找到主键,不瞒足迁移条件") return False if primary_key_count > 1: PrintHelper.print_warning_message("要迁移的表使用复合主键,不满足迁移条件") return False return True except Exception as ex: PrintHelper.print_warning_message("执行出现异常,异常:{0}\n堆栈:{1}\n".format( str(ex), traceback.format_exc())) return False