def get_db(): """Opens a new database connection if there is none yet for the current application context. """ if not hasattr(g, 'mongodb'): db = DbConnect() db.connect_db() g.mongodb = db return g.mongodb.get_connection()
def __init__(self): file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") with open(file_path) as f: config = json.load(f) f.close() access_token = config["DROPBOX_ACCESS_TOKEN"] self.DBX = dropbox.Dropbox(access_token) self.DB = DbConnect() self.BASE_PATH = config["CAMA_BASE_PATH"]
def __init__(self, query_num): self.file_name = "" self.file_mode = "" self.file_encoding = "" self.db_util = DbConnect(host='localhost', user='******', passwd='1234', db='51job', port=3306, charset='utf8') self.query_num = query_num self.query_sql = "SELECT info FROM job LIMIT " + str( query_num) + ";" self.query_data = []
class FileIOAndSQL: def __init__(self, query_num): self.file_name = "" self.file_mode = "" self.file_encoding = "" self.db_util = DbConnect(host='localhost', user='******', passwd='1234', db='51job', port=3306, charset='utf8') self.query_num = query_num self.query_sql = "SELECT info FROM job LIMIT " + str( query_num) + ";" self.query_data = [] def get_data(self): """ 执行查询并将结果赋值给成员变量 """ self.query_data = self.db_util.query(self.query_sql) return self.query_data def data2file(self, file_name, file_mode, file_encoding, data=[]): """ 使用filemode模式打开编码为fileencoding的filename文件,然后 若data非空,则写入data 若data空且self.query_data非空,则写入query_data """ allow_mode = ['w', 'w+', "a", "a+"] if file_mode not in allow_mode: raise ModeErrorException() self.file_name = file_name self.file_mode = file_mode self.file_encoding = file_encoding with open(file=self.file_name, mode=self.file_mode, encoding=self.file_encoding) as f: if data: for i in data: try: f.write(str(i) + '\n') except: raise IOError if not self.query_data: self.get_data() for i in self.query_data: try: f.write(str(i) + '\n') except: raise IOError # TODO 分词后结果写入csv def data2csv(self): pass
def load_db_connects(): for file in glob.glob("db*.ini"): db_connect = DbConnect(file, g_logger) if db_connect.is_connect: g_list_db.append(db_connect) else: text_logger.add_line( '===============================================') text_logger.add_line('Database: ' + db_connect.desc) text_logger.add_line('ERROR TO CONNECT !') text_logger.add_line(db_connect.error_desc) text_logger.add_line( '===============================================')
lower_limit_guess = 0 result = func_base(upper_limit_guess) while result > 0 and upper_limit_guess < 100: lower_limit_guess = upper_limit_guess upper_limit_guess *= 2 last_result = result result = func_base(upper_limit_guess) return (lower_limit_guess, last_result, min(upper_limit_guess, 100), result) if __name__ == '__main__': config_reader.initialize() source_dbc = DbConnect(config_reader.get_source_db_connection_info()) destination_dbc = DbConnect( config_reader.get_destination_db_connection_info()) temp_schema = 'subset_' + str(uuid.uuid4()).replace('-', '') # Get list of tables to operate on all_tables = list_all_tables(source_dbc.get_db_connection()) all_tables = [ x for x in all_tables if x not in config_reader.get_excluded_tables() ] lower_limit, lower_limit_norm, upper_limit, upper_limit_norm = compute_fast_limits( ) max_tries = config_reader.get_max_tries() try:
class DropBox: def __init__(self): file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") with open(file_path) as f: config = json.load(f) f.close() access_token = config["DROPBOX_ACCESS_TOKEN"] self.DBX = dropbox.Dropbox(access_token) self.DB = DbConnect() self.BASE_PATH = config["CAMA_BASE_PATH"] def create_folder(self, folder_name): try: # Creating the folder into Dropbox folder = "/" + folder_name self.DBX.files_create_folder_v2(folder, autorename=False) except Exception as e: raise e def upload_output(self): folder_collection = None folder = None try: self.DB.connect_db() mongo_client = self.DB.get_connection() folder_collection = mongo_client["output"]["folder"] output_path = os.path.join(self.BASE_PATH, "out", "hamid") folder = folder_collection.find_one({"status": "running"}) if folder is None: raise Exception("No Record in execution in Database") if not self.folder_exists(folder["folder_name"]): raise Exception("Folder doesn't exist in dropbox") # Uploading the results folder_name = "/" + folder["folder_name"] for filename in glob.glob(os.path.join(output_path, '*.bin')): with open(filename, 'rb') as fp: self.DBX.files_upload(fp.read(), folder_name + "/" + filename.split("/")[-1], mode=WriteMode("overwrite")) fp.close() # End of loop folder_collection.update({"_id": folder["_id"]}, {"$set": { "status": "completed" }}) except Exception as e: if folder_collection is not None and folder is not None: folder_collection.update({"_id": folder["_id"]}, {"$set": { "status": "error" }}) raise e finally: self.DB.disconnect_db() def folder_exists(self, folder_name): try: metadata = self.DBX.files_get_metadata("/" + folder_name) if isinstance(metadata, dropbox.files.FolderMetadata): return True else: return False except Exception as e: return False def download_file(self, folder_name, file_name, download_folder_name): try: file_path = "/" + folder_name + "/" + file_name download_dir = os.path.join(os.getcwd(), download_folder_name) if not os.path.exists(download_dir): os.makedirs(download_dir) if not os.path.exists( os.path.join(os.getcwd(), download_folder_name, folder_name)): os.mkdir( os.path.join(os.getcwd(), download_folder_name, folder_name)) self.DBX.files_download_to_file( os.path.join(os.getcwd(), download_folder_name, folder_name, file_name), file_path) print("downloaded ", file_name) except Exception as e: raise e def recover(self): try: self.DB.connect_db() mongo_client = self.DB.get_connection() folder_collection = mongo_client["output"]["folder"] folder = folder_collection.find_one({"status": "running"}) if folder is not None: folder_collection.update({"_id": folder["_id"]}, {"status": "error"}) if self.folder_exists(folder["folder_name"]): self.delete_folder(folder["folder_name"]) except Exception as e: raise e finally: self.DB.disconnect_db() def delete_folder(self, folder_name): try: path = "/" + folder_name self.DBX.files_delete_v2(path) except Exception as e: raise e
last_result = desired_result lower_limit_guess = 0 result = func_base(upper_limit_guess) while result > 0: lower_limit_guess = upper_limit_guess upper_limit_guess *= 2 last_result = result result = func_base(upper_limit_guess) return (lower_limit_guess, last_result, upper_limit_guess, result) if __name__ == '__main__': config_reader.initialize() source_dbc = DbConnect(config_reader.get_source_db_connection_info()) destination_dbc = DbConnect( config_reader.get_destination_db_connection_info()) temp_schema = 'subset_' + str(uuid.uuid4()).replace('-', '') lower_limit, lower_limit_norm, upper_limit, upper_limit_norm = compute_fast_limits( ) max_tries = config_reader.get_max_tries() try: bisect(func, lower_limit, upper_limit, maxiter=max_tries, args=(lower_limit, lower_limit_norm, upper_limit, upper_limit_norm))
if db_type == 'postgres': return PsqlDatabaseCreator(source, dest, False) elif db_type == 'mysql': return MySqlDatabaseCreator(source, dest) else: raise ValueError('unknown db_type ' + db_type) if __name__ == '__main__': if "--stdin" in sys.argv: config_reader.initialize(sys.stdin) else: config_reader.initialize() db_type = config_reader.get_db_type() source_dbc = DbConnect(db_type, config_reader.get_source_db_connection_info()) destination_dbc = DbConnect( db_type, config_reader.get_destination_db_connection_info()) database = db_creator(db_type, source_dbc, destination_dbc) database.teardown() database.create() # Get list of tables to operate on db_helper = database_helper.get_specific_helper() all_tables = db_helper.list_all_tables(source_dbc) all_tables = [ x for x in all_tables if x not in config_reader.get_excluded_tables() ] subsetter = Subset(source_dbc, destination_dbc, all_tables)
from subset import Subset from database_creator import DatabaseCreator from db_connect import DbConnect from config_reader import ConfigReader from subset_result_norm import SubsetResultNorm from scipy.optimize import bisect from result_tabulator import SubsetResultFunc source_dbc = DbConnect('.source_db_connection_info') destination_dbc = DbConnect('.destination_db_connection_info') temp_schema = 'subset' schema = 'public' def func(percent, lower_limit, lower_limit_norm, upper_limit, upper_limit_norm): if percent == lower_limit: return lower_limit_norm if percent == upper_limit: return upper_limit_norm return func_base(percent) def func_base(percent): database = DatabaseCreator(source_dbc, destination_dbc, temp_schema, False) database.teardown()