def loadLookupList(refresh=False): global lookuplist # If lookuplist exists, move along. Otherwise, create it try: if lookuplist.empty: pass except NameError: # Read all files in the meta folder meta_path = cfg['informer']['export_path_meta'] all_files = glob.glob(os.path.join(meta_path, "*_CDD*csv")) df_from_each_file = (pd.read_csv(f, encoding="ansi", dtype='str') for f in all_files) lookuplist = pd.concat(df_from_each_file, ignore_index=True) meta_custom = cfg['ccdw']['meta_custom'] meta_custom_csv = pd.read_csv(meta_custom, encoding="ansi", dtype='str') metaList = set(meta_custom_csv['ids'].copy()) lookList = lookuplist['ids'].copy() meta_custom_csv = meta_custom_csv.sort_values(by='ids') delThis = [item for i, item in enumerate(lookList) if item in metaList] delIDs = [list(lookList).index(item) for i, item in enumerate(delThis)] meta_custom_csv.set_index('ids') lookuplist.drop(delIDs, axis=0, inplace=True) lookuplist = lookuplist.append(meta_custom_csv, ignore_index=True) lookuplist = lookuplist.where(pd.notnull(lookuplist), None) lookuplist.set_index('ids') if refresh: print("Update CDD in meta") logger.debug("Update CDD in meta") cddEngine = export.engine() print("...delete old data") logger.debug("...delete old data") cddEngine.execute('DELETE FROM meta.CDD') print("...push new data") logger.debug("...push new data") lookuplist.to_sql('CDD', cddEngine, flavor=None, schema='meta', if_exists='append', index=False, index_label=None, chunksize=None) print("...Update CDD in meta [DONE]") logger.debug("...Update CDD in meta [DONE]")
['STPR.STATUS', 'STPR.STATUS.DATE', 'STPR.STATUS.CHGOPR'], 'STUDENT_TERMS': ['STTR.STATUS', 'STTR.STATUS.DATE'] } # Extract just the date and time fields status_datetime_fields = {} date_regex = regex.compile('.*\.DATE$|.*\.TIME$') for key in status_fields.keys(): fields = status_fields[key] status_datetime_fields[key] = [f for f in fields if date_regex.match(f)] log = open( "log/log_{0}.txt".format( datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S%f")), "w") engine = export.engine(cfg['sql']['driver'], cfg['sql']['server'], cfg['sql']['db'], cfg['sql']['schema']) kList, dTypes, aTypes, aNames, typers = meta.getDataTypes() def processfile(df, fn, d): print("Updating fn = " + fn + ", d = " + d) columnHeaders = list(df.columns.values) columnArray = np.asarray(columnHeaders) # dTyper is a dictionary of Columns and their types to be passed to executeSQL_UPDATE dTyper = {k: dTypes[k] for k in dTypes.keys() & columnArray} # kLister is a dictionary of keys to be passed to executeSQL_UPDATE kLister = {k: kList[k] for k in kList.keys() & columnArray} aTypesr = {k: aTypes[k] for k in aTypes.keys() & columnArray} aNamesr = {k: aNames[k] for k in aNames.keys() & columnArray}