def drop_col(table, *col_drop, db_name=DB_NAME, db_path=DB_DIR_PARENT): """ very basic function for dropping a column in a sqlite db table. --> reason for this: sqlite does not support drop operation """ from functools import reduce db_fullpath = '{}/{}'.format(db_path, db_name) perm = os.stat(db_fullpath).st_mode filef.set_f_perm(db_fullpath, '0666') conn = sqlite3.connect(db_fullpath) c = conn.cursor() table = surround_by_brackets(table) col_dict = get_attr_info(table, c=c) # import pdb; pdb.set_trace() remain_keys = set(col_dict.keys()) - set(surround_by_brackets(col_drop)) remain_keys_str = reduce(lambda _1, _2: '{}, {}'.format(_1, _2), remain_keys) remain_attr_str = reduce( lambda _1, _2: '{}, {} {}'.format(_1, _2, col_dict[_2]), remain_keys) cmd = \ ('CREATE TABLE _bk ({remain_attr_str});\n' 'INSERT INTO _bk SELECT {remain_keys_str} from {table};\n' 'DROP TABLE {table};\n' 'ALTER TABLE _bk RENAME TO {table};')\ .format(table=table, remain_attr_str=remain_attr_str, remain_keys_str=remain_keys_str) for cmd_ in cmd.split('\n'): printf(cmd_) c.execute(cmd_) conn.commit() conn.close() filef.set_f_perm(db_fullpath, perm) printf('successfully drop column(s): {}', col_drop)
def sanity_last_n_commit(*table, num_run=1, db_name=DB_NAME, db_path=DB_DIR_PARENT, time_attr=TIME_ATTR): """ delete the entries with the latest populate_time, for all tables with the time attr ARGUMENTS: table if table=(), then delete entries for all tables, otherwise only delete for that in *table num_run delete entries with the last (num_run) populate time time_attr the name of the time attribute """ db_fullpath = '{}/{}'.format(db_path, db_name) conn = sqlite3.connect(db_fullpath) c = conn.cursor() if len(table) == 0: table = list( c.execute('SELECT name FROM sqlite_master WHERE type=\'table\'')) table = list(map(lambda x: '[{}]'.format(x[0]), table)) else: table = list(map(lambda x: '[{}]'.format(x), table)) # fliter table list to those actually contains the time_attr table_flt = [] for tbl in table: tbl_attr = list( get_attr_info(tbl, enclosing=False, db_fullpath=db_fullpath).keys()) if time_attr in tbl_attr: table_flt += [tbl] time_attr = surround_by_brackets(time_attr) time_set = set() for tbl in table_flt: cur_time_set = set( c.execute('SELECT DISTINCT {} FROM {}'.format(time_attr, tbl))) time_set |= set(map(lambda x: x[0], cur_time_set)) conn.close() time_len = len(time_set) num_run = (num_run > time_len) and time_len or num_run time_list = sorted(list(time_set))[time_len - num_run:] for tbl in table_flt: for t in time_list: sanity_db(time_attr[1:-1], t, tbl[1:-1], db_name=db_name, db_path=db_path) printf('Done: cleared last {} commits for {}'.format(num_run, table_flt)) bad_table = set(table) - set(table_flt) if bad_table: printf('tables {} don\'t have attr {}', bad_table, time_attr, type='WARN')
def load_as_array(db_fullpath, table, attr_list, size=-1, c=None): """ load data from db as numpy array attr name in attr_list may or may not be surrounded by '[' and ']' """ conn = None if not c: conn = sqlite3.connect(db_fullpath) c = conn.cursor() attr_list = surround_by_brackets(attr_list) table = surround_by_brackets(table) tot_num = count_entry(db_fullpath, table, c=c) size = (size == -1) and tot_num or size if tot_num < size: printf('db don\'t have enough entries to load!', type='ERROR') c.execute('SELECT {} FROM {} LIMIT {}'.format(','.join(attr_list), table, size)) ret = np.array(list(c.fetchall())) if conn: conn.close() return ret
def export_to_text(table, out_file, order_by, *col, db_name=DB_NAME, db_path=DB_DIR_PARENT): """ export data entries from a sqlite3 db into a text file order_by output entries order by the value in this column *col the columns in table to be exported """ db_fullpath = '{}/{}'.format(db_path, db_name) conn = sqlite3.connect(db_fullpath) c = conn.cursor() table = surround_by_brackets(table) order_by = surround_by_brackets(order_by) col = [surround_by_brackets(_) for _ in col] col_str = ','.join(col) retrieved = list(c.execute('SELECT DISTINCT {cols} FROM {table} ORDER BY {ob}'.format(cols=col_str, table=table, ob=order_by))) conn.close() from functools import reduce s = reduce(lambda _1, _2: '{}\n{}'\ .format(_1, reduce(lambda __1, __2: '{}, {}'.format(__1,__2), _2)), \ retrieved, '') s = s.strip() ff.print_to_file(out_file, s, type=None, log_dir='./') printf('retrieved data into {}', out_file)
def normalize_col(table, col, group_by_key, db_name=DB_NAME, db_path=DB_DIR_PARENT): db_fullpath = '{}/{}'.format(db_path, db_name) perm = os.stat(db_fullpath).st_mode filef.set_f_perm(db_fullpath, '0666') conn = sqlite3.connect(db_fullpath) c = conn.cursor() table = surround_by_brackets(table) group_by_key = surround_by_brackets(group_by_key) col_norm = '{}_norm'.format(col) col_norm = surround_by_brackets(col_norm) col = surround_by_brackets(col) c.execute('ALTER TABLE {table} ADD COLUMN {col_norm} REAL'.format( table=table, col_norm=col_norm)) k_list = list( c.execute('SELECT DISTINCT {filt} FROM {table}'.format( filt=group_by_key, table=table))) k_list = [i[0] for i in k_list] for k in k_list: m = list( c.execute('SELECT max({col}) FROM {table} WHERE {filt}={k}'.format( col=col, table=table, filt=group_by_key, k=k)))[0] m = float(m[0]) d_list = list( c.execute('SELECT {col} FROM {table} WHERE {filt}={k}'.format( col=col, table=table, filt=group_by_key, k=k))) d_list = [i[0] for i in d_list] for d in d_list: c.execute('UPDATE {table} SET {col_norm} = {ret} WHERE {filt}={k1} AND {col}={k2}'\ .format(table=table, col_norm=col_norm, ret=d/m, filt=group_by_key, k1=k, col=col, k2=d)) conn.commit() conn.close() filef.set_f_perm(db_fullpath, perm) printf('successfully normalize column: {}', col)
def add_col(table, col_add_name, col_add_type, f_lambda, *dependencies, db_name=DB_NAME, db_path=DB_DIR_PARENT): """ dependencies col in the original table should pass dependencies to f_lambda """ db_fullpath = '{}/{}'.format(db_path, db_name) perm = os.stat(db_fullpath).st_mode filef.set_f_perm(db_fullpath, '0666') conn = sqlite3.connect(db_fullpath) c = conn.cursor() table = surround_by_brackets(table) col_add_name = surround_by_brackets(col_add_name) dependencies = surround_by_brackets(dependencies) dp_str = reduce(lambda _1, _2: '{}, {}'.format(_1, _2), dependencies) col_dict = get_attr_info(table, c=c) assert set(dependencies).issubset(set(col_dict.keys())) c.execute('ALTER TABLE {table} ADD COLUMN {col} {type}'.format( table=table, col=col_add_name, type=col_add_type)) dp_list = list( c.execute('SELECT {dp} FROM {table}'.format(dp=dp_str, table=table))) for dp in dp_list: up_cond = zip(dependencies, dp) up_cond_str = map(lambda _: '{}={}'.format(_[0], _[1]), up_cond) up_cond_str = reduce(lambda _1, _2: '{} and {}'.format(_1, _2), up_cond_str) c.execute('UPDATE {table} SET {col} = {ret} WHERE {cond}'\ .format(table=table, col=col_add_name, ret=f_lambda(*dp), cond=up_cond_str)) conn.commit() conn.close() filef.set_f_perm(db_fullpath, perm) printf('successfully add column: {}', col_add_name)
def populate_db(attr_name, attr_type, *d_tuple, db_path=DB_DIR_PARENT, db_name=DB_NAME, table_name=DB_TABLE, append_time=True, usr_time=None, perm='default', silent=False): """ populate data into database, with user defined schema optionally append the time to each data tuple. Policy on existing data: This function will never drop existing data. It will only append to table if it already exists. So this is a safe operation. To delete entries, call sanity_db. ARGUMENTS: attr_name list of attribute name in database attr_type type of attr: e.g.: INTEGER, TEXT, REAL... d_tuple arbitrary num of arguments that consist of the tuple can be 1D or 2D: if 1D, expand it to 2D db_path path of database db_name name of database table_name table name append_time append timestamp to each data tuple if set true perm permission of file. Refer to logf.filef for details silent won't log info after successful population if set True """ file_opt = 'a' db_fullname = '{}/{}'.format(db_path, db_name) filef.mkdir_r(os.path.dirname(db_fullname)) # file permission policy if perm == 'default': perm = (os.path.exists(db_fullname) ) and os.stat(db_fullname).st_mode or '0444' if os.path.exists(db_fullname): filef.set_f_perm(db_fullname, '0666') open(db_fullname, file_opt).close() # set-up num_tuples = -1 d_fulltuple = None for d in d_tuple: d_arr = array(d) assert len(d_arr.shape) <= 2 if len(d_arr.shape) == 2: num_tuples = (num_tuples > -1) and num_tuples or d_arr.shape[0] assert num_tuples == d_arr.shape[0] if num_tuples == -1: # only one tuple num_tuples = 1 for d in d_tuple: d_arr = array(d) if len(d_arr.shape) == 0: d_arr = array([d]) if len(d_arr.shape) == 1: d_arr = d_arr.reshape(1, d_arr.size) d_arr = repeat(d_arr, num_tuples, axis=0) if d_fulltuple is None: d_fulltuple = d_arr else: d_fulltuple = concatenate((d_fulltuple, d_arr), axis=1) if append_time: attr_name = [TIME_ATTR] + list(attr_name) attr_type = ['TEXT'] + list(attr_type) time_str = usr_time and usr_time or strftime(TIME_FORMAT) time_col = array([time_str] * num_tuples) \ .reshape(num_tuples, 1) d_fulltuple = concatenate((time_col, d_fulltuple), axis=1) # sqlite3 conn = sqlite3.connect(db_fullname) c = conn.cursor() table_name = surround_by_brackets(table_name) assert len(attr_name) == len(attr_type) create_clause = ['[{}] {}'.format(attr_name[i], attr_type[i]) \ for i in range(len(attr_name))] create_clause = reduce(lambda a, b: '{}, {}'.format(a, b), create_clause) create_clause = 'CREATE TABLE IF NOT EXISTS {} ({})'.format( table_name, create_clause) c.execute(create_clause) for tpl in d_fulltuple: tpl_str = ['?'] * len(tpl) tpl_str = reduce(lambda a, b: '{}, {}'.format(a, b), tpl_str) insert_clause = 'INSERT INTO {} VALUES ({})'.format( table_name, tpl_str) c.execute(insert_clause, tpl) # finish up conn.commit() conn.close() # enforce file permission filef.set_f_perm(db_fullname, perm) # log if not silent: printf('success: populate {} entries into table {}', num_tuples, table_name, separator=None)
def sanity_db(attr_name, attr_val, table_name, db_name=DB_NAME, db_path=DB_DIR_PARENT, silent=False): """ remove entries in db file. Can be useful to keep the db clean when you do a lot of unstable testing for your project. ARGUMENTS: attr_name list: the selection criteria for deleting db entries attr_val list: the selection value for deleting db entries table the table in the db file db_name db_path the full path of db file is db_name + db_path silent if silent, don't log info after successful deletion """ # convert arg to list if passing in single int / string if len(array(attr_name).shape) == 0: attr_name = [attr_name] if len(array(attr_val).shape) == 0: attr_val = [attr_val] db_fullname = '{}/{}'.format(db_path, db_name) perm = os.stat(db_fullname).st_mode filef.set_f_perm(db_fullname, '0666') # don't check file: leave it to user / wrapper function conn = sqlite3.connect(db_fullname) c = conn.cursor() table_name = surround_by_brackets(table_name) orig_row = count_entry(db_fullname, table_name) attr_len = len(attr_name) assert attr_len == len(attr_val) # check if the attr passed in is the attr in the db db_attr_set = set( get_attr_info(table_name, db_fullpath=db_fullname, enclosing=False).keys()) try: assert len(db_attr_set) == len(db_attr_set | set(attr_name)) except AssertionError: printf('table {} doesn\'t contain some of the following attr: {}', table_name, attr_name, type='ERROR') return attr_val = list( map(lambda s: (type(s) == type('')) and '\'{}\''.format(s) or s, attr_val)) del_cond = [ '[{}] = {}'.format(attr_name[i], attr_val[i]) for i in range(attr_len) ] del_cond = reduce(lambda a, b: '{} and {}'.format(a, b), del_cond) c.execute('DELETE FROM {} WHERE {}'.format(table_name, del_cond)) fina_row = c.execute( 'SELECT Count(*) FROM {}'.format(table_name)).fetchone()[0] conn.commit() conn.close() filef.set_f_perm(db_fullname, perm) if not silent: printf('success: delete {} entries from {}', orig_row - fina_row, table_name, type='WARN')