Exemple #1
0
def loadLookupList(refresh=False):
    global lookuplist

    # If lookuplist exists, move along. Otherwise, create it
    try:
        if lookuplist.empty:
            pass

    except NameError:
        # Read all files in the meta folder
        meta_path = cfg['informer']['export_path_meta']
        all_files = glob.glob(os.path.join(meta_path, "*_CDD*csv"))
        df_from_each_file = (pd.read_csv(f, encoding="ansi", dtype='str')
                             for f in all_files)
        lookuplist = pd.concat(df_from_each_file, ignore_index=True)

        meta_custom = cfg['ccdw']['meta_custom']
        meta_custom_csv = pd.read_csv(meta_custom,
                                      encoding="ansi",
                                      dtype='str')

        metaList = set(meta_custom_csv['ids'].copy())
        lookList = lookuplist['ids'].copy()
        meta_custom_csv = meta_custom_csv.sort_values(by='ids')
        delThis = [item for i, item in enumerate(lookList) if item in metaList]
        delIDs = [list(lookList).index(item) for i, item in enumerate(delThis)]
        meta_custom_csv.set_index('ids')
        lookuplist.drop(delIDs, axis=0, inplace=True)
        lookuplist = lookuplist.append(meta_custom_csv, ignore_index=True)
        lookuplist = lookuplist.where(pd.notnull(lookuplist), None)
        lookuplist.set_index('ids')

        if refresh:
            print("Update CDD in meta")
            logger.debug("Update CDD in meta")
            cddEngine = export.engine()
            print("...delete old data")
            logger.debug("...delete old data")
            cddEngine.execute('DELETE FROM meta.CDD')
            print("...push new data")
            logger.debug("...push new data")
            lookuplist.to_sql('CDD',
                              cddEngine,
                              flavor=None,
                              schema='meta',
                              if_exists='append',
                              index=False,
                              index_label=None,
                              chunksize=None)
            print("...Update CDD in meta [DONE]")
            logger.debug("...Update CDD in meta [DONE]")
    ['STPR.STATUS', 'STPR.STATUS.DATE', 'STPR.STATUS.CHGOPR'],
    'STUDENT_TERMS': ['STTR.STATUS', 'STTR.STATUS.DATE']
}

# Extract just the date and time fields
status_datetime_fields = {}
date_regex = regex.compile('.*\.DATE$|.*\.TIME$')
for key in status_fields.keys():
    fields = status_fields[key]
    status_datetime_fields[key] = [f for f in fields if date_regex.match(f)]

log = open(
    "log/log_{0}.txt".format(
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S%f")), "w")

engine = export.engine(cfg['sql']['driver'], cfg['sql']['server'],
                       cfg['sql']['db'], cfg['sql']['schema'])

kList, dTypes, aTypes, aNames, typers = meta.getDataTypes()


def processfile(df, fn, d):
    print("Updating fn = " + fn + ", d = " + d)
    columnHeaders = list(df.columns.values)
    columnArray = np.asarray(columnHeaders)

    # dTyper is a dictionary of Columns and their types to be passed to executeSQL_UPDATE
    dTyper = {k: dTypes[k] for k in dTypes.keys() & columnArray}
    # kLister is a dictionary of keys to be passed to executeSQL_UPDATE
    kLister = {k: kList[k] for k in kList.keys() & columnArray}
    aTypesr = {k: aTypes[k] for k in aTypes.keys() & columnArray}
    aNamesr = {k: aNames[k] for k in aNames.keys() & columnArray}