Esempio n. 1
0
def drop_col(table, *col_drop, db_name=DB_NAME, db_path=DB_DIR_PARENT):
    """
    very basic function for dropping a column in a sqlite db table.
    --> reason for this: sqlite does not support drop operation
    """
    from functools import reduce
    db_fullpath = '{}/{}'.format(db_path, db_name)
    perm = os.stat(db_fullpath).st_mode
    filef.set_f_perm(db_fullpath, '0666')
    conn = sqlite3.connect(db_fullpath)
    c = conn.cursor()
    table = surround_by_brackets(table)
    col_dict = get_attr_info(table, c=c)
    # import pdb; pdb.set_trace()
    remain_keys = set(col_dict.keys()) - set(surround_by_brackets(col_drop))
    remain_keys_str = reduce(lambda _1, _2: '{}, {}'.format(_1, _2),
                             remain_keys)
    remain_attr_str = reduce(
        lambda _1, _2: '{}, {} {}'.format(_1, _2, col_dict[_2]), remain_keys)
    cmd = \
       ('CREATE TABLE _bk ({remain_attr_str});\n'
        'INSERT INTO _bk SELECT {remain_keys_str} from {table};\n'
        'DROP TABLE {table};\n'
        'ALTER TABLE _bk RENAME TO {table};')\
                .format(table=table, remain_attr_str=remain_attr_str, remain_keys_str=remain_keys_str)
    for cmd_ in cmd.split('\n'):
        printf(cmd_)
        c.execute(cmd_)
    conn.commit()
    conn.close()
    filef.set_f_perm(db_fullpath, perm)
    printf('successfully drop column(s): {}', col_drop)
Esempio n. 2
0
def sanity_last_n_commit(*table,
                         num_run=1,
                         db_name=DB_NAME,
                         db_path=DB_DIR_PARENT,
                         time_attr=TIME_ATTR):
    """
    delete the entries with the latest populate_time, for all tables with the time attr

    ARGUMENTS:
        table       if table=(), then delete entries for all tables, otherwise only delete for that in *table
        num_run     delete entries with the last (num_run) populate time
        time_attr   the name of the time attribute
    """
    db_fullpath = '{}/{}'.format(db_path, db_name)
    conn = sqlite3.connect(db_fullpath)
    c = conn.cursor()
    if len(table) == 0:
        table = list(
            c.execute('SELECT name FROM sqlite_master WHERE type=\'table\''))
        table = list(map(lambda x: '[{}]'.format(x[0]), table))
    else:
        table = list(map(lambda x: '[{}]'.format(x), table))
    # fliter table list to those actually contains the time_attr
    table_flt = []
    for tbl in table:
        tbl_attr = list(
            get_attr_info(tbl, enclosing=False,
                          db_fullpath=db_fullpath).keys())
        if time_attr in tbl_attr:
            table_flt += [tbl]
    time_attr = surround_by_brackets(time_attr)
    time_set = set()
    for tbl in table_flt:
        cur_time_set = set(
            c.execute('SELECT DISTINCT {} FROM {}'.format(time_attr, tbl)))
        time_set |= set(map(lambda x: x[0], cur_time_set))
    conn.close()
    time_len = len(time_set)
    num_run = (num_run > time_len) and time_len or num_run
    time_list = sorted(list(time_set))[time_len - num_run:]
    for tbl in table_flt:
        for t in time_list:
            sanity_db(time_attr[1:-1],
                      t,
                      tbl[1:-1],
                      db_name=db_name,
                      db_path=db_path)

    printf('Done: cleared last {} commits for {}'.format(num_run, table_flt))
    bad_table = set(table) - set(table_flt)
    if bad_table:
        printf('tables {} don\'t have attr {}',
               bad_table,
               time_attr,
               type='WARN')
Esempio n. 3
0
def load_as_array(db_fullpath, table, attr_list, size=-1, c=None):
    """
    load data from db as numpy array
    attr name in attr_list may or may not be surrounded by '[' and ']'
    """
    conn = None
    if not c:
        conn = sqlite3.connect(db_fullpath)
        c = conn.cursor()
    attr_list = surround_by_brackets(attr_list)
    table = surround_by_brackets(table)
    tot_num = count_entry(db_fullpath, table, c=c)
    size = (size == -1) and tot_num or size
    if tot_num < size:
        printf('db don\'t have enough entries to load!', type='ERROR')
    c.execute('SELECT {} FROM {} LIMIT {}'.format(','.join(attr_list), table,
                                                  size))
    ret = np.array(list(c.fetchall()))
    if conn:
        conn.close()
    return ret
Esempio n. 4
0
def export_to_text(table, out_file, order_by, *col, db_name=DB_NAME, db_path=DB_DIR_PARENT):
    """
    export data entries from a sqlite3 db into a text file
        order_by        output entries order by the value in this column
        *col            the columns in table to be exported
    """
    db_fullpath = '{}/{}'.format(db_path, db_name)
    conn = sqlite3.connect(db_fullpath)
    c = conn.cursor()
    table = surround_by_brackets(table)
    order_by = surround_by_brackets(order_by)
    col = [surround_by_brackets(_) for _ in col]
    col_str = ','.join(col)
    retrieved = list(c.execute('SELECT DISTINCT {cols} FROM {table} ORDER BY {ob}'.format(cols=col_str, table=table, ob=order_by)))
    conn.close()
    from functools import reduce
    s = reduce(lambda _1, _2: '{}\n{}'\
            .format(_1, reduce(lambda __1, __2: '{}, {}'.format(__1,__2), _2)), \
            retrieved, '')
    s = s.strip()
    ff.print_to_file(out_file, s, type=None, log_dir='./')
    printf('retrieved data into {}', out_file)
Esempio n. 5
0
def normalize_col(table,
                  col,
                  group_by_key,
                  db_name=DB_NAME,
                  db_path=DB_DIR_PARENT):
    db_fullpath = '{}/{}'.format(db_path, db_name)
    perm = os.stat(db_fullpath).st_mode
    filef.set_f_perm(db_fullpath, '0666')
    conn = sqlite3.connect(db_fullpath)
    c = conn.cursor()
    table = surround_by_brackets(table)
    group_by_key = surround_by_brackets(group_by_key)
    col_norm = '{}_norm'.format(col)
    col_norm = surround_by_brackets(col_norm)
    col = surround_by_brackets(col)
    c.execute('ALTER TABLE {table} ADD COLUMN {col_norm} REAL'.format(
        table=table, col_norm=col_norm))
    k_list = list(
        c.execute('SELECT DISTINCT {filt} FROM {table}'.format(
            filt=group_by_key, table=table)))
    k_list = [i[0] for i in k_list]
    for k in k_list:
        m = list(
            c.execute('SELECT max({col}) FROM {table} WHERE {filt}={k}'.format(
                col=col, table=table, filt=group_by_key, k=k)))[0]
        m = float(m[0])
        d_list = list(
            c.execute('SELECT {col} FROM {table} WHERE {filt}={k}'.format(
                col=col, table=table, filt=group_by_key, k=k)))
        d_list = [i[0] for i in d_list]
        for d in d_list:
            c.execute('UPDATE {table} SET {col_norm} = {ret} WHERE {filt}={k1} AND {col}={k2}'\
                .format(table=table, col_norm=col_norm, ret=d/m,
                    filt=group_by_key, k1=k, col=col, k2=d))

    conn.commit()
    conn.close()
    filef.set_f_perm(db_fullpath, perm)
    printf('successfully normalize column: {}', col)
Esempio n. 6
0
def add_col(table,
            col_add_name,
            col_add_type,
            f_lambda,
            *dependencies,
            db_name=DB_NAME,
            db_path=DB_DIR_PARENT):
    """
    dependencies        col in the original table
                        should pass dependencies to f_lambda
    """
    db_fullpath = '{}/{}'.format(db_path, db_name)
    perm = os.stat(db_fullpath).st_mode
    filef.set_f_perm(db_fullpath, '0666')
    conn = sqlite3.connect(db_fullpath)
    c = conn.cursor()
    table = surround_by_brackets(table)
    col_add_name = surround_by_brackets(col_add_name)
    dependencies = surround_by_brackets(dependencies)
    dp_str = reduce(lambda _1, _2: '{}, {}'.format(_1, _2), dependencies)
    col_dict = get_attr_info(table, c=c)
    assert set(dependencies).issubset(set(col_dict.keys()))
    c.execute('ALTER TABLE {table} ADD COLUMN {col} {type}'.format(
        table=table, col=col_add_name, type=col_add_type))
    dp_list = list(
        c.execute('SELECT {dp} FROM {table}'.format(dp=dp_str, table=table)))
    for dp in dp_list:
        up_cond = zip(dependencies, dp)
        up_cond_str = map(lambda _: '{}={}'.format(_[0], _[1]), up_cond)
        up_cond_str = reduce(lambda _1, _2: '{} and {}'.format(_1, _2),
                             up_cond_str)
        c.execute('UPDATE {table} SET {col} = {ret} WHERE {cond}'\
            .format(table=table, col=col_add_name,
                    ret=f_lambda(*dp), cond=up_cond_str))
    conn.commit()
    conn.close()
    filef.set_f_perm(db_fullpath, perm)
    printf('successfully add column: {}', col_add_name)
Esempio n. 7
0
def populate_db(attr_name,
                attr_type,
                *d_tuple,
                db_path=DB_DIR_PARENT,
                db_name=DB_NAME,
                table_name=DB_TABLE,
                append_time=True,
                usr_time=None,
                perm='default',
                silent=False):
    """
    populate data into database, with user defined schema
    optionally append the time to each data tuple.
    Policy on existing data:
        This function will never drop existing data. It will only append to 
        table if it already exists. So this is a safe operation. 
        To delete entries, call sanity_db. 
    
    ARGUMENTS:
        attr_name       list of attribute name in database
        attr_type       type of attr: e.g.: INTEGER, TEXT, REAL...
        d_tuple         arbitrary num of arguments that consist of the tuple
                        can be 1D or 2D: if 1D, expand it to 2D
        db_path         path of database
        db_name         name of database
        table_name      table name
        append_time     append timestamp to each data tuple if set true
        perm            permission of file. Refer to logf.filef for details
        silent          won't log info after successful population if set True
    """
    file_opt = 'a'
    db_fullname = '{}/{}'.format(db_path, db_name)
    filef.mkdir_r(os.path.dirname(db_fullname))
    # file permission policy
    if perm == 'default':
        perm = (os.path.exists(db_fullname)
                ) and os.stat(db_fullname).st_mode or '0444'
    if os.path.exists(db_fullname):
        filef.set_f_perm(db_fullname, '0666')

    open(db_fullname, file_opt).close()
    # set-up
    num_tuples = -1
    d_fulltuple = None
    for d in d_tuple:
        d_arr = array(d)
        assert len(d_arr.shape) <= 2
        if len(d_arr.shape) == 2:
            num_tuples = (num_tuples > -1) and num_tuples or d_arr.shape[0]
            assert num_tuples == d_arr.shape[0]
    if num_tuples == -1:  # only one tuple
        num_tuples = 1
    for d in d_tuple:
        d_arr = array(d)
        if len(d_arr.shape) == 0:
            d_arr = array([d])
        if len(d_arr.shape) == 1:
            d_arr = d_arr.reshape(1, d_arr.size)
            d_arr = repeat(d_arr, num_tuples, axis=0)
        if d_fulltuple is None:
            d_fulltuple = d_arr
        else:
            d_fulltuple = concatenate((d_fulltuple, d_arr), axis=1)

    if append_time:
        attr_name = [TIME_ATTR] + list(attr_name)
        attr_type = ['TEXT'] + list(attr_type)
        time_str = usr_time and usr_time or strftime(TIME_FORMAT)
        time_col = array([time_str] * num_tuples) \
                .reshape(num_tuples, 1)
        d_fulltuple = concatenate((time_col, d_fulltuple), axis=1)
    # sqlite3
    conn = sqlite3.connect(db_fullname)
    c = conn.cursor()
    table_name = surround_by_brackets(table_name)
    assert len(attr_name) == len(attr_type)
    create_clause = ['[{}] {}'.format(attr_name[i], attr_type[i]) \
                    for i in range(len(attr_name))]
    create_clause = reduce(lambda a, b: '{}, {}'.format(a, b), create_clause)
    create_clause = 'CREATE TABLE IF NOT EXISTS {} ({})'.format(
        table_name, create_clause)
    c.execute(create_clause)
    for tpl in d_fulltuple:
        tpl_str = ['?'] * len(tpl)
        tpl_str = reduce(lambda a, b: '{}, {}'.format(a, b), tpl_str)
        insert_clause = 'INSERT INTO {} VALUES ({})'.format(
            table_name, tpl_str)
        c.execute(insert_clause, tpl)
    # finish up
    conn.commit()
    conn.close()
    # enforce file permission
    filef.set_f_perm(db_fullname, perm)
    # log
    if not silent:
        printf('success: populate {} entries into table {}',
               num_tuples,
               table_name,
               separator=None)
Esempio n. 8
0
def sanity_db(attr_name,
              attr_val,
              table_name,
              db_name=DB_NAME,
              db_path=DB_DIR_PARENT,
              silent=False):
    """
    remove entries in db file. Can be useful to keep the db clean
    when you do a lot of unstable testing for your project.

    ARGUMENTS:
        attr_name       list: the selection criteria for deleting db entries
        attr_val        list: the selection value for deleting db entries
        table           the table in the db file
        db_name
        db_path         the full path of db file is db_name + db_path
        silent          if silent, don't log info after successful deletion
    """
    # convert arg to list if passing in single int / string
    if len(array(attr_name).shape) == 0:
        attr_name = [attr_name]
    if len(array(attr_val).shape) == 0:
        attr_val = [attr_val]
    db_fullname = '{}/{}'.format(db_path, db_name)
    perm = os.stat(db_fullname).st_mode
    filef.set_f_perm(db_fullname, '0666')
    # don't check file: leave it to user / wrapper function
    conn = sqlite3.connect(db_fullname)
    c = conn.cursor()
    table_name = surround_by_brackets(table_name)
    orig_row = count_entry(db_fullname, table_name)
    attr_len = len(attr_name)
    assert attr_len == len(attr_val)
    # check if the attr passed in is the attr in the db
    db_attr_set = set(
        get_attr_info(table_name, db_fullpath=db_fullname,
                      enclosing=False).keys())
    try:
        assert len(db_attr_set) == len(db_attr_set | set(attr_name))
    except AssertionError:
        printf('table {} doesn\'t contain some of the following attr: {}',
               table_name,
               attr_name,
               type='ERROR')
        return
    attr_val = list(
        map(lambda s: (type(s) == type('')) and '\'{}\''.format(s) or s,
            attr_val))
    del_cond = [
        '[{}] = {}'.format(attr_name[i], attr_val[i]) for i in range(attr_len)
    ]
    del_cond = reduce(lambda a, b: '{} and {}'.format(a, b), del_cond)
    c.execute('DELETE FROM {} WHERE {}'.format(table_name, del_cond))
    fina_row = c.execute(
        'SELECT Count(*) FROM {}'.format(table_name)).fetchone()[0]
    conn.commit()
    conn.close()
    filef.set_f_perm(db_fullname, perm)
    if not silent:
        printf('success: delete {} entries from {}',
               orig_row - fina_row,
               table_name,
               type='WARN')