def _get_config(cusid, tech, date, mod_name): if type(date) is datetime.date: ymd = '{:%Y%m%d}'.format(date) cf = ETL.get_computed_config(cusid, tech, mod_name) sfx = str(pathlib.Path(cusid).joinpath(tech).joinpath(ymd)) cfgbase = cf[RESTRICT.CONFIG_PATH].joinpath(cf[RESTRICT.CONFIG_FOLDER]).joinpath(sfx) dpbase = cf[RESTRICT.DATA_PATH].joinpath(sfx) return cfgbase, dpbase
def get_all_columns(cusid, tech, date, CAT, mod_name): if type(date) is datetime.date: dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] tmppath = dppath.joinpath('{}/{}/{:%Y%m%d}/tmp'.format( cusid, tech, date)) LRC_ow_ta_columns = dict() for LRC, z1, f1 in extract_info(cusid, tech, date, mod_name): LRC_ow_ta_columns[LRC] = DS_DBColumns.get_ow_ta_columns( z1, f1, tmppath, CAT) return LRC_ow_ta_columns
def gen_all_columns(cusid, tech, date, CAT, mod_name): if type(date) is datetime.date: dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] tmppath = dppath.joinpath('{}/{}/{:%Y%m%d}/tmp'.format( cusid, tech, date)) for _, z1, f1 in extract_info(cusid, tech, date, mod_name): columns = _get_columns(z1, f1, tmppath, CAT) for owner, tblname in columns: yield owner, tblname, columns[owner, tblname]
#if not vote: # logger(__name__).debug(cols_order1) # logger(__name__).debug(columns3) # for c in columns3: # logger(__name__).debug(c == cols_order1[columns3[c]['order']]) """ return columns3, cols_order1 # ## function chkcol(...) if type(date) is datetime.date: logger(__name__).info('checking column {date} {case}'.format(date=date, case=CAT)) dsconf = customer.get_default_DS_config_all(cusid, tech) dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[CAT] s = [(l, z, f) for l, z, f in Common.extract_info( cusid, tech, date, CAT, __name__)] LRCs = set([l for _, (l, _, _) in enumerate(s)]) zfs = set([(z, f, _get_owner(CAT, z, dsconf[RESTRICT.ZIP_FLT][CAT]), _get_table_name(f, dsconf[RESTRICT.CSV_FLT][CAT])) for _, (_, z, f) in enumerate(s)]) LRC_ow_ta_columns = DSColumn.get_all_columns(cusid, tech, date, CAT, __name__) synthcols = _get_synthesized_columns(LRC_ow_ta_columns, zfs, LRCs) workpath = dppath.joinpath( '{cusid}/{tech}/{d:%Y%m%d}/tmp/{cat}/chkcol'.format(cusid=cusid, tech=tech, d=date, cat=CAT))
def check(cusid, tech, date, date_to): if type(date) is datetime.date and type(date_to) is datetime.date: logger(__name__).info('checking CM delta {} from {}'.format( date, date_to)) conf = ETL.get_computed_config(cusid, tech, __name__) dpbase = conf[RESTRICT.DATA_PATH].joinpath('{}/{}/{:%Y%m%d}'.format( cusid, tech, date)) tfdrpath = dpbase.joinpath('tmp') cc = Column.get_check_columns(None, cusid, tech, RESTRICT.CORE, __name__) df = '{:%Y-%m-%d}' preficies = system_key[RESTRICT.CATEGORY] excluded_columns = set( ['OBJ_GID', 'NBR', 'LAST_MODIFIED', 'LAST_MODIFIER']) """ #c = 0 #ig = 0 """ ## using key (line['CO_GID'], line['CO_PARENT_GID'], line['CO_DN']) for LRC, z1, f1, z2, f2 in Common.extract_info_pair( date, date_to, cusid, tech, _cat, __name__): tblname, is_list_tblname = _csv_filename_to_tblname(f1) if tblname not in cc: continue outpath = Common.get_output_path( 'C_LTE_CMDLTE_DELTA_CHECK_{date:%Y%m}'.format(date=date), f1, cusid, tech, date, _cat1, __name__) """ #logger(__name__).debug(tblname) #logger(__name__).debug(outpath) #logger(__name__).debug((LRC, z1, f1, z2, f2)) #continue """ Common.extract_file(z1, f1, tfdrpath, __name__) Common.extract_file(z2, f2, tfdrpath, __name__) curdict = dict() tfpath = tfdrpath.joinpath(f2) with open(str(tfpath), 'r') as fo: reader = csv.DictReader(fo, delimiter=RESTRICT.DELIMITER) for _, line in enumerate(reader): if is_list_tblname: curdict[line['OBJ_GID'], line['NBR']] = line else: curdict[line['OBJ_GID']] = line fo.close() tfpath.unlink() tfpath = tfdrpath.joinpath(f1) lines = list() with open(str(tfpath), 'r') as fo: reader = csv.DictReader(fo, delimiter=RESTRICT.DELIMITER) if is_list_tblname: e = enumerate([ line for _, line in enumerate(reader) if (line['OBJ_GID'], line['NBR']) in curdict ]) else: e = enumerate([ line for _, line in enumerate(reader) if line['OBJ_GID'] in curdict ]) """ #c1 = 0 #ig1 = 0 """ for _, line in e: t = line if is_list_tblname: y = curdict[line['OBJ_GID'], line['NBR']] else: y = curdict[line['OBJ_GID']] if t != y: """ #c1 = c1 + 1 """ for _, col in enumerate([ x for x in line.keys() if x not in excluded_columns ]): if col in t and col in y and t[col] != y[col]: strtdate = df.format(date) strydate = df.format(date_to) new_line = RESTRICT.DELIMITER.join([ strtdate, strydate, strtdate, str(preficies[LRC, 'CTP']) + line['OBJ_GID'], line['NBR'] if 'NBR' in line else '0', col, y[col], t[col] ]) #logger(__name__).debug(new_line) lines.append(new_line) """ #else: # ig1 = ig1 + 1 """ fo.close() tfpath.unlink() """ #logger(__name__).debug(c1) #logger(__name__).debug(ig1) #c = c + c1 #ig = ig + ig1 """ if lines != list(): Folder.create(outpath.parent, __name__) with open(str(outpath), 'w') as fo: fo.write('TIME;DateY;DateT;_id;NBR;Field;ParamY;ParamT\n') i = -1 for i, ln in enumerate(lines): fo.write(ln) fo.write('\n') fo.close() logger(__name__).info('written: "{}"'.format(outpath)) logger(__name__).info('{} line{}'.format( i + 1, '' if i < 1 else 's')) del curdict """
def chkcol(cusid, tech, date, load= False): if type(date) is datetime.date: logger(__name__).info('checking column {} FM'.format(str(date))) ymd = '{:%Y%m%d}'.format(date) dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] database = DB.get_computed_config(cusid, tech, __name__)[RESTRICT.FM][RESTRICT.DB] advpath = dppath.joinpath('{}/{}/{}/columns/check/{}.sql'.format(cusid, tech, ymd, database)) File.remove(advpath, __name__) owner_tables = Common.get_owner_tables(cusid, tech, date, _cat, __name__) cols = DSColumn.extract_columns(cusid, tech, date, _cat, owner_tables, __name__) for tblname1 in cols: tblname = '{}_{:%Y%m}'.format(tblname1, date) dbcols = DB.get_columns(cusid, tech, _cat, tblname, __name__) new = dict() add = dict() alter = dict() if dbcols == dict(): new[tblname] = cols[tblname1] else: for col in cols[tblname1]: if col not in dbcols: add[col] = cols[tblname1][col] elif not DSColumn.type_equal(cols[tblname1][col], dbcols[col]): #logger(__name__).debug(cols[tblname1][col]) #logger(__name__).debug(dbcols[col]) alter[col] = cols[tblname1][col] if new != dict() or add != dict() or alter != dict(): profile = list() if new != dict(): profile.append('create') if add != dict(): profile.append('add column') if alter != dict(): profile.append('change column') logger(__name__).info('FM table {}: {}'.format(tblname, profile)) advpath.touch() with open(str(advpath), 'a') as fo: fo.write('use {};\n'.format(database)) if new != dict(): sql = DSColumn.to_sql(create= new) if load: Common.just_run_sql(cusid, tech, _cat, sql, __name__) fo.write('{};\n'.format(sql)) if add != dict(): fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, add_column= add))) if alter != dict(): fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, change_column= alter))) fo.close() if advpath.exists(): logger(__name__).info('advice: "{}"'.format(str(advpath)))
def _init_datapath(cusid, tech, datapath): if datapath is None: conf = ETL.get_computed_config(cusid, tech, __name__) return conf[RESTRICT.DATA_PATH] else: return datapath