Ejemplo n.º 1
0
def just_run_sql(cusid, tech, CAT, SQL, mod_name= __name__):
    dbconf = ETLDB.get_computed_config(cusid, tech, mod_name)[CAT]
    conn = DB.get_connection(dbconf[RESTRICT.HOST],
                             dbconf[RESTRICT.PORT],
                             dbconf[RESTRICT.USER],
                             dbconf[RESTRICT.PSWD],
                             dbconf[RESTRICT.DB],
                             dbconf[RESTRICT.CHARSET], mod_name)
    #logger(mod_name).debug(SQL)
    DB.run_sql(conn, SQL)
    conn.close()
Ejemplo n.º 2
0
def get_check_columns(tblname, cusid, tech, CAT, mod_name):

    if tblname is None:
        SQL = _get_SQL_check_columns(cusid, tech)
    if type(tblname) in set([str, list]):
        SQL = _get_SQL_check_columns(cusid, tech, tblname)

    dbconf = DB1.get_computed_config(cusid, tech, mod_name)[CAT]
    task_name = 'get columns for checking {}'.format(tblname)
    return DB.get_data_set(dbconf, task_name, SQL, _get_proc_check_columns(),
                           mod_name)
Ejemplo n.º 3
0
def _get_columns(cusid, tech):
    dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[_cat]
    tblname = dbconf[RESTRICT.TABLE]
    columns = ETLDB.get_columns(cusid,
                                tech,
                                _cat,
                                tblname=tblname,
                                mod_name=__name__)
    for c in [
            '__id', '_type', '_additional1', '_additional2', 'create_date',
            'last_update'
    ]:
        columns.pop(c)
    for c in columns:
        print(c, columns[c])
    return tblname, columns
Ejemplo n.º 4
0
        #    logger(__name__).debug(columns3)
        #    for c in columns3:
        #        logger(__name__).debug(c == cols_order1[columns3[c]['order']])
        """
        return columns3, cols_order1

    #

    ## function chkcol(...)
    if type(date) is datetime.date:
        logger(__name__).info('checking column {date} {case}'.format(date=date,
                                                                     case=CAT))
        dsconf = customer.get_default_DS_config_all(cusid, tech)
        dppath = ETL.get_computed_config(cusid, tech,
                                         __name__)[RESTRICT.DATA_PATH]
        dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[CAT]
        s = [(l, z, f) for l, z, f in Common.extract_info(
            cusid, tech, date, CAT, __name__)]
        LRCs = set([l for _, (l, _, _) in enumerate(s)])
        zfs = set([(z, f, _get_owner(CAT, z, dsconf[RESTRICT.ZIP_FLT][CAT]),
                    _get_table_name(f, dsconf[RESTRICT.CSV_FLT][CAT]))
                   for _, (_, z, f) in enumerate(s)])
        LRC_ow_ta_columns = DSColumn.get_all_columns(cusid, tech, date, CAT,
                                                     __name__)
        synthcols = _get_synthesized_columns(LRC_ow_ta_columns, zfs, LRCs)
        workpath = dppath.joinpath(
            '{cusid}/{tech}/{d:%Y%m%d}/tmp/{cat}/chkcol'.format(cusid=cusid,
                                                                tech=tech,
                                                                d=date,
                                                                cat=CAT))
        Folder.create(workpath, __name__)
Ejemplo n.º 5
0
def chkcol(cusid, tech, date, load= False):
    if type(date) is datetime.date:
        logger(__name__).info('checking column {} FM'.format(str(date)))
        ymd = '{:%Y%m%d}'.format(date)
        dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH]
        
        database = DB.get_computed_config(cusid, tech, __name__)[RESTRICT.FM][RESTRICT.DB]
        advpath = dppath.joinpath('{}/{}/{}/columns/check/{}.sql'.format(cusid, tech, ymd, database))
        File.remove(advpath, __name__)

        owner_tables = Common.get_owner_tables(cusid, tech, date, _cat, __name__)
        cols = DSColumn.extract_columns(cusid, tech, date, _cat, owner_tables, __name__)

        for tblname1 in cols:

            tblname = '{}_{:%Y%m}'.format(tblname1, date)
            dbcols = DB.get_columns(cusid, tech, _cat, tblname, __name__)

            new = dict()
            add = dict()
            alter = dict()

            if dbcols == dict():
                new[tblname] = cols[tblname1]
            
            else:
                for col in cols[tblname1]:

                    if col not in dbcols:
                        add[col] = cols[tblname1][col]

                    elif not DSColumn.type_equal(cols[tblname1][col], dbcols[col]):
                        #logger(__name__).debug(cols[tblname1][col])
                        #logger(__name__).debug(dbcols[col])
                        alter[col] = cols[tblname1][col]

            if new != dict() or add != dict() or alter != dict():

                profile = list()
                if new != dict():
                    profile.append('create')
                if add != dict():
                    profile.append('add column')
                if alter != dict():
                    profile.append('change column')
                logger(__name__).info('FM table {}: {}'.format(tblname, profile))

                advpath.touch()
                with open(str(advpath), 'a') as fo:
                    
                    fo.write('use {};\n'.format(database))
                    
                    if new != dict():
                        sql = DSColumn.to_sql(create= new)
                        if load:
                            Common.just_run_sql(cusid, tech, _cat, sql, __name__)
                        fo.write('{};\n'.format(sql))
                        
                    if add != dict():
                        fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, add_column= add)))

                    if alter != dict():
                        fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, change_column= alter)))
                        
                    fo.close()

        if advpath.exists():
            logger(__name__).info('advice: "{}"'.format(str(advpath)))
Ejemplo n.º 6
0
                                 c2=c2,
                                 tym=tblname_ym,
                                 g=_build_group_by(agg_case, group_by),
                                 ym=_ym(tblname_ym))

        logger(__name__).debug(sql1)
        #logger(__name__).debug(sql2)
        return sql1, sql2

    if case in _cases:
        datapath = _init_datapath(cusid, tech, datapath)
        casepath = datapath.joinpath('{cusid}/{tech}/{d:%Y%m%d}/{case}'.format(
            cusid=cusid, tech=tech, d=date, case=case))
        logger(__name__).info('aiming path: {path}'.format(path=casepath))

        dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[case]
        database = dbconf[RESTRICT.DB]

        i = -1
        for i, folder in enumerate(
            [x for x in casepath.glob('*') if x.is_dir()]):
            #logger(__name__).debug(folder)

            owner, tblname, tblname_ym = folder.parent.name, _tblname_without_ym(
                folder.name), folder.name
            #logger(__name__).debug((cusid, tech, owner, tblname_ym, __name__))
            dbcols = set(
                ETLDB.get_columns(cusid, tech, owner, tblname_ym,
                                  __name__).keys())
            aggcols_spec = ETLDB.get_agg_rules(cusid, tech, owner, tblname,
                                               __name__)
Ejemplo n.º 7
0
def load(DDL_proc, SQL_gen_proc, date, cusid, tech, CAT, mod_name):
    """
    ## DDL_proc: lambda (owner, tblname) -> SQL
    ##- tblname: table name in database
    ## SQL_gen_proc: lambda (sqlformat, lines) -> SQL
    ## - sqlformat: 'insert into some_table ({columns}) values {values};'
    ## - lines: a list of { column: value }
    """
    if Util.is_function(DDL_proc) and Util.get_arity(DDL_proc) == 2 and\
       Util.is_function(SQL_gen_proc) and Util.get_arity(SQL_gen_proc) == 4:
        #import dispy
        cfgbase, dpbase = _get_config(cusid, tech, date, mod_name)
        #logger(mod_name).debug(cfgbase)
        #logger(mod_name).debug(dpbase)
        dbconf_base = ETLDB.get_computed_config(cusid, tech, mod_name)
        dbconf1 = dbconf_base[CAT]
        dbconfs = dict([
            (PCOFNSRAW, dbconf_base[PCOFNSRAW]),
            (PCOFNGRAW, dbconf_base[PCOFNGRAW]),
            (IMSCSFRAW, dbconf_base[IMSCSFRAW]),
            (IMSHSSRAW, dbconf_base[IMSHSSRAW]),
            (MADNHRRAW, dbconf_base[MADNHRRAW]),
            (MADODCRAW, dbconf_base[MADODCRAW]),
            (IMSDRARAW, dbconf_base[IMSDRARAW]),
            (XMLNSSRAW, dbconf_base[XMLNSSRAW]),
            (NOKOBWRAW, dbconf_base[NOKOBWRAW]),
            (NOKOMWRAW, dbconf_base[NOKOMWRAW]),
            (NOKIUMRAW, dbconf_base[NOKIUMRAW])
        ])
        hsformat = str(cfgbase.joinpath('history/{cat}.{tblname}.{filestem}.upload'))
        fdrpath = dpbase.joinpath(CAT)
        #logger(mod_name).debug(fdrpath)
        #cluster = dispy.JobCluster(_load_SQL)
        for p in fdrpath.glob('*/*.txt'):
            #logger(mod_name).debug('{} {}'.format(type(p), p))
            owner = _get_owner(p.parent.parent.name)
            tblname = p.parent.name
            hspath = pathlib.Path(hsformat.format(cat= CAT, tblname= tblname, filestem= p.stem))
            if _has_symbol(hspath):
                continue
            logger(__name__).debug('symbol create: "{}"'.format(hspath))
            #_put_symbol(hspath)
            if owner in dbconfs:
                dbconf = dbconfs[owner]
            else:
                dbconf = dbconf1
            ddl = DDL_proc(owner, tblname)
            conn = DB.get_connection(dbconf[RESTRICT.HOST],
                                     dbconf[RESTRICT.PORT],
                                     dbconf[RESTRICT.USER],
                                     dbconf[RESTRICT.PSWD],
                                     dbconf[RESTRICT.DB],
                                     dbconf[RESTRICT.CHARSET], mod_name)

            #if not DB.table_exists(conn, dbconf[RESTRICT.DB], tblname):
            #    block_size = DB.get_variable(conn, 'max_allowed_packet')
            #    DB.run_sql(conn, ddl)
            #DB.run_sql(conn, 'set autocommit=0;')
            ##conn.close()
            #for sql in _gen_SQL_list(SQL_gen_proc, owner, tblname, p, block_size):
            #    #logger(__name__).debug(sql)
            #    #conn = DB.get_connection(dbconf[RESTRICT.HOST],
            #    #                         dbconf[RESTRICT.PORT],
            #    #                         dbconf[RESTRICT.USER],
            #    #                         dbconf[RESTRICT.PSWD],
            #    #                         dbconf[RESTRICT.DB],
            #    #                         dbconf[RESTRICT.CHARSET], mod_name)
            #    #logger(__name__).debug(len(sql))
            #    DB.run_sql(conn, sql)
            #    #DB.run_sql(conn, 'commit;')
            #    #conn.close()
           
            ##conn = DB.get_connection(dbconf[RESTRICT.HOST],
            ##                         dbconf[RESTRICT.PORT],
            ##                         dbconf[RESTRICT.USER],
            ##                         dbconf[RESTRICT.PSWD],
            ##                         dbconf[RESTRICT.DB],
            ##                         dbconf[RESTRICT.CHARSET], mod_name)
            #DB.run_sql(conn, 'commit;')
            #conn.close()

            block_size = DB.get_variable(conn, 'max_allowed_packet')
            DB.run_sql(conn, ddl)
            try:
                cursor = conn.cursor()
                cursor.execute('start transaction')
                for i, sql in enumerate(_gen_SQL_list(SQL_gen_proc, owner, tblname, p, block_size)):
                    sql1 = re.sub(r'(\r\n|\n)', ' ', sql).strip()
                    sql2 = sql1[:512]
                    #logger(__name__).debug(sql1)
                    logger(__name__).debug('run: length {}; "{}{}"'.format(len(sql1), sql2, '...' if len(sql1) > len(sql2) else ''))
                    cursor.execute(sql)
                cursor.connection.commit()
            except Exception as ex:
                traceback.print_exc()
            finally:
                cursor.close()
                conn.close()
Ejemplo n.º 8
0
        return "delete from `{database}`.`{table_to}`; insert into `{database}`.`{table_to}` select * from `{database}`.`{table_from}` where date(`TIME`) = '{d:%Y-%m-%d}'".format(
            database=dbname,
            table_to='{}_latest'.format(tblname),
            table_from='{}_{:%Y%m}'.format(tblname, date),
            d=date)

    #

    ## function chkcol(...)
    if type(date) is datetime.date:
        logger(__name__).info('aggregate {date} {case}'.format(date=date,
                                                               case=CAT))
        dsconf = customer.get_default_DS_config_all(cusid, tech)
        dppath = ETL.get_computed_config(cusid, tech,
                                         __name__)[RESTRICT.DATA_PATH]
        dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[CAT]
        s = [(l, z, f) for l, z, f in Common.extract_info(
            cusid, tech, date, CAT, __name__)]
        #LRCs = set([ l for _, (l, _, _) in enumerate(s) ])
        zfs = set([(z, f, _get_owner(CAT, z, dsconf[RESTRICT.ZIP_FLT][CAT]),
                    _get_table_name(f, dsconf[RESTRICT.CSV_FLT][CAT]))
                   for _, (_, z, f) in enumerate(s)])
        #LRC_ow_ta_columns = DSColumn.get_all_columns(cusid, tech, date, CAT, __name__)
        #synthcols = _get_synthesized_columns(LRC_ow_ta_columns, zfs, LRCs)
        DB_creating = set()
        except_cols = set([
            '_ID', 'LRC', 'TIME', 'PERIOD_START_TIME', 'LAST_MODIFIED',
            'LAST_MODIFIER'
        ])
        workpath = dppath.joinpath(
            '{cusid}/{tech}/{d:%Y%m%d}/tmp/{cat}/agg'.format(cusid=cusid,