Python openworkbook Exemples, cpuc.workbookfunctions.openworkbook Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : createReportData.py Projet : sbwconsulting/GroupD_2017Review

def output_to_excel_pivot(dataobj):
    wb = openworkbook(dataobj.sourcefile)
    datasheet = dataobj.datasheet
    ws_pvt = wb[dataobj.pivotsheet]
    ws_data = wb[datasheet]

    #clear data sheet and put in new data
    wb.remove(ws_data)
    wb.create_sheet(datasheet)
    ws_data = wb[datasheet]
    #header = ['id']
    #header.extend(dataobj.data.columns)
    #print(f'header type {type(header)} : {header}')
    #no idea why line above doesn't work but line below does. Output appears identical
    #header = ['id'] + [w.replace('i', 'i') for w in list(dataobj.data.columns)]
    header = [w.replace('i', 'i') for w in list(dataobj.data.columns)]
    #print(f'header after replace type {type(header)} : {header}')
    ws_data.append(header)
    for r in dataframe_to_rows(dataobj.data, index=False, header=False):
        ws_data.append(r)

    #update pivot data area
    pivot = ws_pvt._pivots[0]
    pivot.cache.cacheSource.worksheetSource.ref = f'A1:{get_column_letter(len(dataobj.data.columns))}{len(dataobj.data.index)+1}'
    pivot.cache.refreshOnload = True
    try:
        wb.save(filename=dataobj.outputfile)
    except:
        print('excel file in use. No output file made')
        logging.warning(f'file in use {dataobj.outputfile}')

Exemple #2

0

Afficher le fichier

def get_df_from_driver(filepath: str, sheet: str, querystr: str = None):
    """
    Get a dataframe of a sheet from the driver spreadsheet
    Pass in:
        filepath: Path to the driverfile
        sheet: Sheet with the list
        querystr: Optional Query string to limit the list. The form of the query string is 'active == "y"'
        Assumes tables starts with first row. If not try using 
    Returns a dataframe
    """
    filelist = None
    try:
        sfsession = ShareFileSession(SHAREFILE_OPTIONS)
        readlist_item = sfsession.get_io_version(filepath)
        control_wb = wf.openworkbook(readlist_item.io_data)
        if not control_wb:
            logging.warning('cannot open read template file list workbook')
            return False
        ws = control_wb[sheet]
        filelist = wf.convertwstodf(ws)
        if querystr:
            filelist = filelist.query(querystr)
        control_wb.close()
    except Exception as e:
        msg = f'Problem in get_df_from_driver: {e}'
        logging.critical(msg)
        print(msg)
        return False

    readlist_item = None
    sfsession = None
    return filelist

Exemple #3

0

Afficher le fichier

Fichier : createReportData.py Projet : sbwconsulting/GroupD_2017Review

def generatefiles(session):
    #get field lists
    xreffile = params.D0_FIELD_CROSS_REF_FILE
    wb = openworkbook(xreffile)
    ws_map = wb['RptOutput']
    #convert to datframe
    df_map = convertwstodf(ws_map, 1)

    ordercol = 'FieldOrder'
    claimcol = 'claimfields'
    clmfullcol = 'claimfull'
    dbrvwcol = 'dbreviewfields'
    #dbextcol = 'dbrvwextra'
    evalcol = 'evalfields'
    atrcol = 'atrfields'
    stepcol = 'stepeqn'
    rawcol = 'rawextra'

    #get sampleframe
    df_frame = pd.read_csv(params.D0_DATA_PATH + '\\cust_trkg_data_2017.csv')
    #df_frame = df_frame.rename(columns={'Unamed: 0': 'excelcounter'})
    #df_frame = df_frame.set_index('ClaimID')
    cols = ['ClaimID', 'SBW.ProjID']
    df_frame_short = df_frame[cols]
    print(f'short type is {type(df_frame_short)}')
    #get claim data
    df_claim_full = pd.read_csv(params.D0_DATA_FILE, low_memory=False)
    df_claim_full = df_claim_full.merge(df_frame_short, on='ClaimID')
    # Exclude replaced measurements
    df_claim = df_claim_full[df_claim_full.Replaced != 'Yes']
    # Pick only sampled rows
    df_claim = df_claim[df_claim.sampled == 'Y']
    #limit to claim fields
    df_map_claimfields = df_map[df_map[claimcol].notnull()]
    df_map_claimfields = df_map_claimfields[[ordercol, claimcol]]
    df_claim = df_claim[df_map_claimfields[claimcol].tolist()]
    claimdict = df_map_claimfields.set_index(claimcol)[ordercol].to_dict()
    df_claim = df_claim.rename(columns=claimdict)
    #print(f'claimshape is {df_claim.shape} with cols {df_claim.columns}')

    #get ATR claim data
    df_atr = pd.read_csv(params.D0_LOCAL_ATR_OUTPUT_FILE, low_memory=False)
    df_map_atrfields = df_map[df_map[atrcol].notnull()]
    df_map_atrfields = df_map_atrfields[[ordercol, atrcol]]
    df_atr = df_atr[df_map_atrfields[atrcol].tolist()]
    atrdict = df_map_atrfields.set_index(atrcol)[ordercol].to_dict()
    df_atr = df_atr.rename(columns=atrdict)

    #get eval data
    measures_without_sample_id = pd.read_sql(
        session.query(Measure).statement, session.bind)
    #drop project fields
    smplfields = [
        'RvwInstallDate', 'RvwAppVsInstallDate', 'RvwPaidIncentive',
        'RvwPermit'
    ]
    measures_without_sample_id.drop(smplfields, axis=1, inplace=True)
    samples = pd.read_sql(session.query(Sample).statement, session.bind)
    measures1 = measures_without_sample_id.merge(samples, on='SBW_ProjID')
    df_map_evalfields = df_map[df_map[evalcol].notnull()]
    df_map_evalfields = df_map_evalfields[[ordercol, evalcol]]
    df_dups = None
    df_dups = df_map_evalfields[df_map_evalfields.duplicated(evalcol)]
    print('dup is {}'.format(df_dups))
    df_map_evalfields.drop(df_dups[ordercol].tolist(), axis=0, inplace=True)
    evaldict = df_map_evalfields.set_index(evalcol)[ordercol].to_dict()
    measures = measures1[df_map_evalfields[evalcol].tolist()]
    measures = measures.rename(columns=evaldict)
    measures = measures.assign(SampledProject=1)

    if len(df_dups.index) > 0:
        #only works if there are not multiple of the same dup
        #add the dup fields
        evaldict = df_dups.set_index(evalcol)[ordercol].to_dict()
        tmp = measures1.rename(columns=evaldict)
        measures = measures.join(tmp[df_dups[ordercol].tolist()])
        del tmp

    df_eval = measures.merge(df_claim, on='ClaimID')
    df_eval = df_eval.merge(df_atr, on='ClaimID', suffixes=('eval', ''))
    print(f'df_eval shape with claim and atr is {df_eval.shape}')

    #Get data from alldata
    df_raw_extra = pd.read_csv(params.D0_ALL_DATA_FILE, low_memory=False)
    df_raw_extra = remapdata(df_map, rawcol, ordercol, df_raw_extra)
    df_eval = df_eval.merge(df_raw_extra, on='ClaimID', suffixes=(
        '_old', ''
    ))  #shouldn't be any overlap, but just in case want to keep a clean set.

    #get db review data
    df_dbreview = pd.read_csv(params.D0_DATABASE_REVIEW_FILE)
    #df_dbreview_ext = pd.read_csv(params.D0_DATABASE_REVIEW_EXTENDED_FILE)

    #drop passthru records
    df_dbreview = df_dbreview.query(
        'EvalStdReportGroup == "2017_Savings_Review"')
    df_map_dbrvwfields = df_map[df_map[dbrvwcol].notnull()]
    df_map_dbrvwfields = df_map_dbrvwfields[[ordercol, dbrvwcol]]
    df_dbreview = df_dbreview[df_map_dbrvwfields[dbrvwcol].tolist()]
    dbrwvdict = df_map_dbrvwfields.set_index(dbrvwcol)[ordercol].to_dict()
    df_dbreview = df_dbreview.rename(columns=dbrwvdict)
    '''
    #add extra fields
    df_map_dbextfields = df_map[df_map[dbextcol].notnull()]
    df_map_dbextfields = df_map_dbextfields[[ordercol, dbextcol]]
    df_dbreview_ext = df_dbreview_ext[df_map_dbextfields[dbextcol].tolist()]
    dbrwvdict = df_map_dbextfields.set_index(dbextcol)[ordercol].to_dict()
    df_dbreview_ext = df_dbreview_ext.rename(columns=dbrwvdict)
    df_dbreview = df_dbreview.merge(df_dbreview_ext, on='ClaimID')
    '''

    #add claimfull
    df_map_clmfullfields = df_map[df_map[clmfullcol].notnull()]
    df_map_clmfullfields = df_map_clmfullfields[[ordercol, clmfullcol]]
    df_claim_ext = df_claim_full[df_map_clmfullfields[clmfullcol].tolist()]
    cfulldict = df_map_clmfullfields.set_index(clmfullcol)[ordercol].to_dict()
    df_claim_ext = df_claim_ext.rename(columns=cfulldict)
    df_dbreview = df_dbreview.merge(df_claim_ext, on='ClaimID')

    #add source flag
    df_dbreview = df_dbreview.assign(ClaimsDatabase=1)

    #the merge below appends dbreview rows to sample rows. Total rows = dbrevoew + sample
    #df_evalallrows = df_eval.append(df_dbreview)
    #the merge below creates extrs fields whey they overlap based on matching claimIDs. Total rows = dbreview rows.
    df_eval = df_eval.merge(df_dbreview,
                            on='ClaimID',
                            how='outer',
                            suffixes=('', '_cdr'))

    #now add in the atr fields for the fullset
    df_eval = df_eval.merge(df_atr,
                            on='ClaimID',
                            how='left',
                            suffixes=('', '_atr'))

    #Add in the steps for table 5
    df_steps = createSteps(df_eval)
    df_steps = df_steps.reset_index()
    df_steps = remapdata(df_map, stepcol, ordercol, df_steps)
    df_eval = df_eval.merge(df_steps, on='ClaimID', suffixes=(
        '_old', ''
    ))  #shouldn't be any overlap, but just in case want to keep a clean set.

    # just a diagnostic section, I think
    ws_map = wb['Eligibility']
    #convert to datframe
    df_map_elig = convertwstodf(ws_map, 1)
    anycol = 'noteligfieldsany'
    allcol = 'noteligfieldsall'
    df_map_fields = df_map_elig[df_map_elig[anycol].notnull()]
    anyfields = df_map_fields[anycol].tolist()
    df_map_fields = df_map_elig[df_map_elig[allcol].notnull()]
    allfields = df_map_fields[allcol].tolist()
    tmp = ((df_eval[anyfields] == 'No').any(1) |
           (df_eval[allfields] == 'No').all(1))
    print(f'ineligible list length {(tmp.sum())}')

    t6cnts = (df_eval[anyfields] == 'No').sum()
    csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\settozerocnts.csv'
    t6cnts.to_csv(csvfile)
    #print(f't6? {t6cnts}')
    #end of mystery section. Is it just for diagnostic purposes?

    print(f'df_eval shape after dbreview merge is {df_eval.shape}')
    #print(f'df_evalallrows shape after dbreview append is {df_evalallrows.shape}')
    #flag for changed NTG and EUL IDs
    #df_eval = df_eval.assign(NTGIDChanged = df_eval['cdrNTG_ID'] != df_eval['EvalNTG_ID'])
    df_eval = df_eval.assign(EvalEUL_ID_trim=df_eval['EvalEUL_ID'].str.replace(
        '_Any', '').str.replace('_Gro', ''))
    #df_eval = df_eval.assign(EULIDChanged = df_eval['cdrEUL_ID'] != df_eval['EvalEUL_ID_trim'])
    df_eval = df_eval.assign(MeasDescChanged=df_eval['EvalMeasDescription'] !=
                             df_eval['MeasDescription'])
    #new fields to indicate savings changed
    df_eval = df_eval.assign(
        kWhChanged=df_eval['EvalExPostLifecycleNetkWh_atr'] !=
        df_eval['ExAnteLifecycleNetkWh'])
    df_eval = df_eval.assign(
        kWhPctChange=((df_eval['EvalExPostLifecycleNetkWh_atr'] -
                       df_eval['ExAnteLifecycleNetkWh']) /
                      df_eval['ExAnteLifecycleNetkWh']))
    df_eval = df_eval.assign(
        thmChanged=df_eval['EvalExPostLifecycleNetTherm_atr'] !=
        df_eval['ExAnteLifecycleNetTherm'])
    df_eval = df_eval.assign(
        thmPctChange=((df_eval['EvalExPostLifecycleNetTherm_atr'] -
                       df_eval['ExAnteLifecycleNetTherm']) /
                      df_eval['ExAnteLifecycleNetTherm']))

    if True:
        #set final column order
        df_map_fields = df_map[df_map[ordercol].notnull()]
        #fieldorder = df_map_fields[ordercol].tolist()
        #df_final = df_eval[fieldorder]
        df_final = df_eval

        try:
            csvfile = params.D0_REPORT_DATA_FILE
            df_final.to_csv(csvfile, index=False)
            logging.info(f'wrote csv for {csvfile}')
        except:
            logging.info("file in use writing to backup")
            df_final.to_csv(params.SAMPLED_SITE_REVIEW_PATH +
                            '\\evaldata2.csv',
                            index=False)

    #generate xl file
    exportdata = PivotOutput()
    exportdata.sourcefile = params.SAMPLED_SITE_REVIEW_PATH + '\\pivottemplate.xlsx'
    exportdata.outputfile = params.SAMPLED_SITE_REVIEW_PATH + '\\evaldata.xlsx'
    exportdata.datasheet = 'data'
    exportdata.pivotsheet = 'pvt_data'
    exportdata.data = df_final
    output_to_excel_pivot(exportdata)

    print('data exported')

    logging.info('Done generating report data files')

Exemple #4

0

Afficher le fichier

Fichier : read_table_spec.py Projet : sbwconsulting/GroupD_CIAC2018

def gettablespecs(sfsession, filepath, statusfilter='Test'):
    """
    Load the various table specs from the passed file as defined on the Captions sheet
    Inputs - Filepath or workbook, statusfilter to control which tables
    Returns a list of tablespec objects (and future figure spec) 
    """
    #open workbook
    #assumes there is a captions sheet which will drive it
    sheet = 'Captions'
    srcsheet = 'SourceDefs'    

    #TODO bring in def as a dataframe instead of messing with it as a sheet.
    if isinstance(filepath, str):
        file_item = sfsession.get_io_version(filepath)
        if file_item:
            filepath = file_item.io_data
    try:
        wb = openworkbook(filepath, values=True)
    except:    
        wb = filepath

    if not wb:
        print('problem loading workbook. Quitting')
        return False
    try:
        ws = wb[sheet]
    except:
        print('Captions sheet missing')
        return False
    try:
        #ws_src = None
        ws_src = wb[srcsheet]
    except:
        print('Source defs sheet missing')
        return False
    #cycle through the captions listed if type is table
    #for row in ws.iter_rows(row_offset=1):
    #get critical col numbers
    for i in range(1,ws.max_column + 1):
        if ws.cell(1,i).value is None:
            continue
        #print(f'text is {ws.cell(1,i)}')
        if ws.cell(1,i).value.lower() == 'type':
            typecol = i
        elif ws.cell(1,i).value.lower() == 'text':  #for old version
            captioncol = i
        elif ws.cell(1,i).value.lower() == 'caption': #for new version
            captioncol = i
        elif ws.cell(1,i).value.lower() == 'sheet':
            sheetcol = i
        elif ws.cell(1,i).value.lower() == 'status':
            statuscol = i
        elif ws.cell(1,i).value.lower() == 'source':
            sourcecol = i
        elif ws.cell(1,i).value.lower() == 'destination':
            destcol = i
        elif ws.cell(1,i).value.lower() == 'style':
            stylecol = i
        elif ws.cell(1,i).value.lower() == 'fit':
            fitcol = i

    parameters = []
    #not using this section after D0 (I don't think)
    '''
    if 'sheetcol' in locals():
        #to pull filter column data. PA in the D0 example
        for i in range(sheetcol + 2, ws.max_column + 1): #to skip extra column
            param = Pfield(i, ws.cell(1,i).value)
            #parameters.append({pcol:i, field:ws.cell(1,i)})
            parameters.append(param)
    '''
    
    tablespecs = {}
    figurespecs = {}
    for i in range(1,ws.max_row +1):
        #print(f'processing row {i} as sheet {ws.cell(i,sheetcol).value}')
        #test below breaks on D0 so no longer compatible :(
        
        if ws.cell(i,destcol).value is not None and ws.cell(i,destcol).value.lower() == 'report workbook' and ws.cell(i,statuscol).value is not None and statusfilter.lower() in ws.cell(i,statuscol).value.lower():
            ws_object = wb[ws.cell(i,sheetcol).value]
            tblspec = TableSpec()
            tblspec.getdataspec(ws_object)                        
            tblspec.source = SourceDef().get_details(sfsession, wb=ws.parent, sheet='SourceDefs', srccol='sourcename', srcname='sourcedef', loccol='location', shtcol='sheet', tablename=ws.cell(i,sourcecol).value )
            tblspec.name = ws.cell(i,captioncol).value
            tblspec.get_unique_fields()
            tablespecs[tblspec.name] = tblspec
        elif ws.cell(i,destcol).value is not None and ws.cell(i,typecol).value.lower() == 'table' and ws.cell(i,statuscol).value is not None and statusfilter.lower()  in ws.cell(i,statuscol).value.lower():
            lkpsheet = ws.cell(i,sheetcol).value
            if lkpsheet not in wb.sheetnames:
                print(f'yo! sheet missing:{lkpsheet}')
                continue
            ws_object = wb[ws.cell(i,sheetcol).value]
            #ws_object = wb[ws.cell(i,captioncol).hyperlink.location.split('!')[0].replace("'", '')]            
            sfilter = ''
            for item in parameters:
                if ws.cell(i,item.column).value:
                    if ws.cell(i,item.column).value.isnumeric():
                        sfilter += f'{item.field} == {ws.cell(i,item.column).value} and '
                    else:
                        sfilter += f'{item.field} == \'{ws.cell(i,item.column).value}\' and '
            if sfilter == '':
                sfilter = None
            elif sfilter[-4:] == 'and ':
                sfilter = sfilter[:-5]

            tblspec = createtablespec(ws_object, sfilter)

            if not tblspec:
                print(f'problem getting tablespec for {ws_object.title}')
                continue 
            tblspec.source = SourceDef().get_details(sfsession, wb=ws.parent, sheet='SourceDefs', srccol='sourcename', srcname='sourcedef', loccol='location', shtcol='sheet', tablename=ws.cell(i,sourcecol).value )            
            tblspec.name = ws.cell(i,captioncol).value
            tblspec.style = ws.cell(i,stylecol).value
            tblspec.autofit = ws.cell(i,fitcol).value
            tblspec.get_unique_fields()
            tablespecs[tblspec.name] = tblspec
        elif ws.cell(i,destcol).value is not None and ws.cell(i,typecol).value.lower() == 'figure':
            ws_object = wb[ws.cell(i,captioncol).hyperlink.location.split('!')[0].replace("'", '')]
            figurespecs[ws.cell(i,captioncol).value] = createfigurespec(ws_object) #this call currently doesn't do anythign because the procedure is just a pass

    #possibly only temporary processing of figures
    #TODO change to io verion
    df_captions = pd.read_excel(filepath, sheet)       
    df_figures = df_captions[df_captions['Type'].str.contains('Plot') & df_captions['Status'].str.contains(statusfilter)].groupby('Source')
    
    for source, data in df_figures:
        figurespec = TableSpec()
        figurespec.captions = [x for x in data['Caption']]
        figurespec.source = SourceDef().get_details(sfsession, wb=ws.parent, sheet='SourceDefs', srccol='sourcename', srcname='sourcedef', loccol='location', shtcol='sheet', tablename=source)
        figurespecs[source] = figurespec

    specs = []
    specs.append({'tables':tablespecs})
    specs.append({'figures':figurespecs})
    return specs

Exemple #5

0

Afficher le fichier

def db_to_excel(filepath):
    """
    Move data from the db (or other source) to excel files as defined in the passed excel filename
    The passed workbook must have a sheet named spec and columns named active, srctype, tablename, srcindex, srcfields, destindex, destfile, sheet, startrow, fields

    """
    sfsession = ShareFileSession(SHAREFILE_OPTIONS)
    #open workbook
    control_item = sfsession.get_io_version(filepath)
    if not control_item:
        logging.warning(f'driver file {filepath} not found for db_to_excel')
        return False
    control_wb = wf.openworkbook(control_item.io_data)
    ws = control_wb['spec']
    filelist = wf.convertwstodf(ws)
    filelist = filelist.query('active == "y"')
    control_wb.close()
    control_item = None
    #gather needed parts

    #TODO sort list so all changes to a workbook can be done and then that workbook saved rather than needing to open and close for each write
    for filerow in filelist.itertuples():
        excelpath = filerow.destfile
        localname = os.path.basename(excelpath)
        headerrow = filerow.headerrow
        sheet = filerow.sheet
        fields = filerow.fields
        table = filerow.tablename
        srcindex = filerow.srcindex
        dstindex = filerow.destindex
        wkb_item = sfsession.get_io_version(excelpath)
        hasvba = '.xlsm' in excelpath
        srcfields = filerow.srcfields
        srctype = filerow.srctype

        if srctype == 'db':
            #get data from db
            data = pd.read_sql(table, engine)
        elif srctype == 'csv':
            src_item = sfsession.get_io_version(table)
            if not src_item:
                logging.warning(f'file {table} not found for db_to_excel')
                continue
            src_item.io_data.seek(0)
            data = pd.read_csv(src_item.io_data)
        elif srctype == 'excel':
            src_item = sfsession.get_io_version(table)
            if not src_item:
                logging.warning(f'file {table} not found for db_to_excel')
                continue
            srcsheet = filerow.srcsheet
            srcrow = filerow.srcheaderrow
            data = pd.read_excel(src_item.io_data,
                                 sheet_name=srcsheet,
                                 start_row=srcrow - 1)

        srcfieldlist = srcfields.split(',')
        srcfieldlist.append(srcindex)
        data_fields = data[srcfieldlist]
        dstfieldlist = fields.split(',')
        dstfieldlist.append(dstindex)
        #remap src field names to dst field names
        data_fields.columns = dstfieldlist
        datalist = data_fields.to_dict('records')

        #this is to figure out where data should go, but since it uses values, can't save
        ws_vals = getSampleControlFile(
            filepath=wkb_item.io_data,
            wks=sheet,
            headerrow=headerrow,  #not used but passing it anyway
            asdf=False,
            usevalues=True)
        #this is the one where values actually get written to
        ws_out = getSampleControlFile(
            filepath=wkb_item.io_data,
            wks=sheet,
            headerrow=headerrow,  #not used but passing it anyway
            asdf=False,
            usevalues=False,
            usevba=hasvba)
        print(f'starting to write to excel {fields} {str(datetime.now())}')
        ws_results = writetoexcel(ws_vals=ws_vals,
                                  ws_out=ws_out,
                                  header_row=headerrow,
                                  datalist=datalist,
                                  key=dstindex)
        print(f'done writing  to excel {str(datetime.now())}')
        if not ws_results:
            print('bad results')
        else:
            tracker_io = BytesIO()
            ws_results.parent.save(tracker_io)
            #upload to sf
            wkb_folderID = wkb_item.data['Parent']['Id']
            sfsession.upload_file(wkb_folderID, localname, tracker_io)
            print(f'done uploading to sharefile {str(datetime.now())}')

Exemple #6

0

Afficher le fichier

Fichier : stepchanges.py Projet : sbwconsulting/GroupD_2017Review

def createSteps(data=None):

    if data is None:
        df_data = pd.read_csv(params.D0_REPORT_DATA_FILE)
    else:
        df_data = data

    xreffile = params.D0_FIELD_CROSS_REF_FILE
    wb = openworkbook(xreffile)
    ws_map = wb['stepeqn']
    #convert to datframe
    df_map = convertwstodf(ws_map, 1)
    fieldcol = 'fieldname'
    eqncol = ['eqn_true', 'eqn_false', 'eqn_condition']
    eqnheaders = [fieldcol]
    eqnheaders.extend(eqncol)

    #some constants for the calculated stuff
    operators = '*/()+-><!==AND&OR|.'
    stringops = 'ANDOR&|'
    dfname = 'df_all_fields'
    df_all_fields = df_data.set_index('ClaimID')
    #pull in conditional
    df_map_eqnfields = df_map[df_map[eqncol].notnull().any(1)]
    df_map_eqnfields = df_map_eqnfields[eqnheaders]
    print(f'all fields column count to start is {len(df_all_fields.columns)}')
    for _, row in df_map_eqnfields.iterrows():

        parts_true = row[eqncol[0]].split()
        try:
            parts_false = row[eqncol[1]].split()
            parts_cond = row[eqncol[2]].split()
            eqnonly = False
        except:
            eqnonly = True

        nojoin = False
        for p in parts_true:
            if len(parts_true) == 1:
                eqn_true = p
                nojoin = True
            elif p not in operators and '==' not in p and '.' not in p and not is_number(
                    p):
                parts_true[parts_true.index(p)] = "{}['{}']".format(dfname, p)
        if not nojoin:
            eqn_true = ''.join(parts_true)
        nojoin = False
        if not eqnonly:
            for p in parts_false:
                if len(parts_false) == 1:
                    eqn_false = p
                    nojoin = True
                elif p not in operators and '==' not in p and not is_number(p):
                    parts_false[parts_false.index(p)] = "{}['{}']".format(
                        dfname, p)
            if not nojoin:
                eqn_false = ''.join(parts_false)
            nojoin = False

            for p in parts_cond:
                if len(parts_cond) == 1:
                    eqn_cond = p
                    nojoin = True
                elif p not in operators and 'No' not in p and '==' not in p and '.' not in p and not is_number(
                        p
                ):  #maybe change to startswith in operators if can use list
                    parts_cond[parts_cond.index(p)] = "{}['{}']".format(
                        dfname, p)
                elif p in operators:
                    parts_cond[parts_cond.index(p)] = ' ' + p + ' '
            if not nojoin:
                eqn_cond = ''.join(parts_cond)

        #print('eqn true:{}'.format(eqn_true))
        #print('eqn false:{}'.format(eqn_false))
        #print('eqn cond:{}'.format(eqn_cond))

        #df['d'] = df['b'].where(df['b'] < 0, df['c'])
        if eqnonly:
            myargs = eqn_true
        else:
            myargs = eqn_true + '.where(' + eqn_cond + ', ' + eqn_false + ')'
        dftmp = df_all_fields
        #dftmp.set_index('ClaimID', inplace=True)
        #print(f'all fields colmn count before eval is {len(df_all_fields.columns)}')
        print(f'processing {row[fieldcol]}')  #', my args is {myargs}')
        #no idea how the line below is adding col to df_all_fields
        dftmp[row[fieldcol]] = eval(myargs)

        #dftmp = dftmp[row[fieldcol]].reset_index().set_index('ClaimID')

        #print(f'all fields colmn count after eval is {len(df_all_fields.columns)}')
        #df_all_fields = df_all_fields.join(dftmp)
        #print(f'all fields colmn count after join is {len(df_all_fields.columns)}')
        #print('ugh')
        #df_all_fields = df_all_fields.merge(dftmp, on='ClaimID')

    print(f'all field shape is {df_all_fields.shape}')

    answerfields = [
        'ExAnteLifecycleNetkW', 'ExAnteLifecycleNetkWh',
        'ExAnteLifecycleNetTherm', 'cdrdatekw', 'cdrdatekwh', 'cdrdatethm',
        'cdrntgeligkw', 'cdrntgeligkwh', 'cdrntgeligthm', 'cdrulntgeligkw',
        'cdrulntgeligkwh', 'cdrulntgeligthm'
    ]
    countfields = [
        'cdrdateineligibleflagkw', 'cdrdateineligibleflagkwh',
        'cdrdateineligibleflagthm', 'cdrdatentgineligibleflagkw',
        'cdrdatentgineligibleflagkwh', 'cdrdatentgineligibleflagthm',
        'cdrdatentgulineligibleflagkw', 'cdrdatentgulineligibleflagkwh',
        'cdrdatentgulineligibleflagthm'
    ]
    answerfieldsPA = answerfields
    answerfieldsPA.append('PA')
    df_all_fieldsShort = df_all_fields[answerfieldsPA]
    summarytable = df_all_fieldsShort.groupby(['PA']).sum()
    csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_dbr_SumsbyPA.csv'
    summarytable.to_csv(csvfile)

    answerfieldsPA = countfields
    answerfieldsPA.append('PA')
    df_all_fieldsShort = df_all_fields[answerfieldsPA]
    summarytable = df_all_fieldsShort.groupby(['PA']).sum()
    csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_dbr_CountsbyPA.csv'
    summarytable.to_csv(csvfile)
    '''
    pd.options.display.float_format = '{:20,.0f}'.format
    print(f'step 1 summary {summarytable}')
    df_output = reshapestepssummary(summarytable)
    csvfile  = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_dbr_SumsAll.csv'
    df_output.to_csv(csvfile)
    

    summarytable = df_all_fields[countfields].agg('sum')
    print(f'step 1 count summary {summarytable}')
    df_output = reshapestepssummary(summarytable)
    csvfile  = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_dbr_CountsAll.csv'
    df_output.to_csv(csvfile)
    '''

    #sample only version
    answerfields = [
        'ExAnteLifecycleNetkW', 'ExAnteLifecycleNetkWh',
        'ExAnteLifecycleNetTherm', 'evaleligkw', 'evaleligkwh', 'evaleligthm',
        'evalsvgsEligkw', 'evalsvgsEligkwh', 'evalsvgsEligthm',
        'evalULsvgeligkw', 'evalULsvgeligkwh', 'evalULsvgeligthm',
        'evalNTGULsvgeligkw', 'evalNTGULsvgeligkwh', 'evalNTGULsvgeligthm'
    ]
    countfields = [
        'evalineligibleflagkw', 'evalineligibleflagkwh',
        'evalineligibleflagthm', 'evalsvgschangeflagkw',
        'evalsvgschangeflagkwh', 'evalsvgschangeflagthm', 'evalULchangeflagkw',
        'evalULchangeflagkwh', 'evalULchangeflagthm', 'evalNTGchangeflagkw',
        'evalNTGchangeflagkwh', 'evalNTGchangeflagthm'
    ]
    summarytable = df_all_fields.query(
        'SampledProject == 1')[answerfields].agg('sum')
    #print(f'step 2x summary {summarytable}')
    summarytable = df_all_fields.query('SampledProject == 1')[countfields].agg(
        'sum')
    #print(f'step 2x count summary {summarytable}')

    #ATR version
    answerfields = [
        'ExAnteLifecycleNetkW', 'ExAnteLifecycleNetkWh',
        'ExAnteLifecycleNetTherm', 'atr_eligkw', 'atr_eligkwh', 'atr_eligthm',
        'atr_svgsEligkw', 'atr_svgsEligkwh', 'atr_svgsEligthm',
        'atr_NTGsvgeligkw', 'atr_NTGsvgeligkwh', 'atr_NTGsvgeligthm',
        'atr_NTGULsvgeligkw', 'atr_NTGULsvgeligkwh', 'atr_NTGULsvgeligthm'
    ]
    countfields = [
        'atr_ineligibleflagkw', 'atr_ineligibleflagkwh',
        'atr_ineligibleflagthm', 'atr_svgschangeflagkw',
        'atr_svgschangeflagkwh', 'atr_svgschangeflagthm', 'atr_ULchangeflagkw',
        'atr_ULchangeflagkwh', 'atr_ULchangeflagthm', 'atr_NTGchangeflagkw',
        'atr_NTGchangeflagkwh', 'atr_NTGchangeflagthm'
    ]

    answerfieldsPA = answerfields
    answerfieldsPA.append('PA')
    df_all_fieldsShort = df_all_fields[answerfieldsPA]
    summarytable = df_all_fieldsShort.groupby(['PA']).sum()
    csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_DS_SumsbyPA.csv'
    summarytable.to_csv(csvfile)

    answerfieldsPA = countfields
    answerfieldsPA.append('PA')
    df_all_fieldsShort = df_all_fields[answerfieldsPA]
    summarytable = df_all_fieldsShort.groupby(['PA']).sum()
    csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\steps_DS_CountsbyPA.csv'
    summarytable.to_csv(csvfile)

    csvfile = params.D0_REPORT_STEPTABLE_DATA_FILE
    try:
        df_all_fields.to_csv(csvfile, index=True)
    except:
        print('drat, step file in use. no file created.')

    if data is not None:
        return df_all_fields

Exemple #7

0

Afficher le fichier

Fichier : createATR.py Projet : sbwconsulting/GroupD_CIAC2018

def createEvalResults(session, WriteFinalFile=False):

    #create sample file
    data = pd.read_csv(params.D0_DATA_FILE)
    # Exclude replaced measurements
    not_replaced = data[data.Replaced != 'Yes']
    # Pick only sampled rows
    sample_list = not_replaced[not_replaced.sampled == 'Y']
    # Group by SampleID and SBW_ProjID
    sample_groups = sample_list.groupby(
        ['SampleID',
         'SBW_ProjID']).size().reset_index().rename(columns={0: ''})
    sample_groups = sample_groups.rename(columns={'': 'msrcount'})
    sample_groups = sample_groups.astype({'SampleID': int})
    #read data from database
    df_control = getSampleControlFile()
    df_control = df_control[df_control['ProjectStatus'] == 'Complete']
    #join together so have complete and projectids
    mylist = df_control.set_index('SampleID').join(
        sample_groups.set_index('SampleID'), how='left', lsuffix='_msr')
    #print('MYLIST TYPE IS {}'.format(mylist))
    #print('list cols:{}'.format(mylist.columns))
    msrheaders = ['SBW_ProjID', 'ProjectStatus', 'msrcount']
    mylist = mylist[msrheaders]

    print('complete list shape: {}'.format(mylist.shape))
    #print(mylist)
    #s.query(User).filter(User.name == 'Mariana').one()
    #msrs = session.query(Measure).all()
    #print('msr type is {}'.format(type(msrs)))
    #df_msrs = pd.DataFrame(session.query(Measure).all())
    df_msrs = pd.read_sql(session.query(Measure).statement, session.bind)
    #print('msr type: {}'.format(type(df_msrs)))
    #mylist.set_index('SBW_ProjID', inplace=True)
    #df_msrs.set_index('SBW_ProjID', inplace=True)
    df_complete_msrs = mylist.set_index('SBW_ProjID').join(
        df_msrs.set_index('SBW_ProjID'), how='left', lsuffix='_msr')
    #df_complete_msrs = mylist.join(df_msrs, how='left', lsuffix='_msr')
    #print('msr shape:{}'.format(df_msrs.shape))
    print('msr complete shape:{}'.format(df_complete_msrs.shape))
    #print(df_complete_msrs)

    #Make adjustments
    #read crossreference file
    xreffile = params.D0_FIELD_CROSS_REF_FILE
    wb = openworkbook(xreffile)
    ws_map = wb['Eligibility']
    #convert to datframe
    df_map = convertwstodf(ws_map, 1)

    anycol = 'noteligfieldsany'
    allcol = 'noteligfieldsall'
    df_map_fields = df_map[df_map[anycol].notnull()]
    anyfields = df_map_fields[anycol].tolist()
    df_map_fields = df_map[df_map[allcol].notnull()]
    allfields = df_map_fields[allcol].tolist()

    #Measures Not eligible
    savingsfields = [
        'EvalBase1kWhSvgs', 'EvalBase1kWSvgs', 'EvalBase1ThermSvgs',
        'EvalBase2kWhSvgs', 'EvalBase2kWSvgs', 'EvalBase2ThermSvgs'
    ]
    df_complete_msrs.reset_index(inplace=True)
    df_complete_msrs['EvalIneligiblekw'] = False
    df_complete_msrs['EvalIneligiblekwh'] = False
    df_complete_msrs['EvalIneligiblethm'] = False
    for field in savingsfields:
        if 'kWSvgs' in field:
            engtype = 'kw'
        if 'kWhSvgs' in field:
            engtype = 'kwh'
        if 'ThermSvgs' in field:
            engtype = 'thm'

        df_complete_msrs[field + '_Orig'] = df_complete_msrs[
            field]  #create Orig field to hold original value
        df_complete_msrs[field + '_ChangeReason'] = 'NA'
        df_complete_msrs[field] = df_complete_msrs[field].where(
            (df_complete_msrs[anyfields] != 'No').all(1), 0)
        #df_complete_msrs[field] = df_complete_msrs[field].where((df_complete_msrs[anyfields] != 'No').all(1)
        #    & (df_complete_msrs[allfields] != 'No').any(1), 0)
        df_complete_msrs[field + '_ChangeReason'] = df_complete_msrs[
            field + '_ChangeReason'].where(
                (df_complete_msrs[anyfields] != 'No').all(1), 'Ineligible')
        #df_complete_msrs[field+ '_ChangeReason'] = df_complete_msrs[field+ '_ChangeReason'].where((df_complete_msrs[anyfields] != 'No').all(1)
        #    & (df_complete_msrs[allfields] != 'No').any(1),  'Ineligible')
        df_complete_msrs['EvalIneligible' + engtype] = df_complete_msrs[
            'EvalIneligible' + engtype].where(
                (df_complete_msrs[anyfields] != 'No').all(1), True)
        #df_complete_msrs['EvalIneligible' + engtype] = df_complete_msrs['EvalIneligible' + engtype].where((df_complete_msrs[anyfields] != 'No').all(1)
        #    & (df_complete_msrs[allfields] != 'No').any(1), True)
    #ntgr to zero
    ntgrfields = ['EvalNTG_kWH',
                  'EvalNTG_therms']  #, 'EvalNTGRTherm','EvalNTGRCost']
    df_complete_msrs['ProgInfluenceFlag'] = (
        df_complete_msrs[allfields] == 'No').all(1)
    for field in ntgrfields:
        df_complete_msrs[field] = df_complete_msrs[field].where(
            (df_complete_msrs[allfields] != 'No').any(1), 0)

    df_project = pd.read_sql(session.query(Sample).statement, session.bind)
    print('prj shape:{}'.format(df_project.shape))
    #produces cartesian join, ouch
    #df_join = pd.DataFrame(session.query(Measure, Sample).all())
    #print('join shape:{}'.format(df_join.shape))

    #add calculated/lookup fields
    #set column names
    ws_map = wb['mapping']
    #convert to datframe
    df_map = convertwstodf(ws_map, 1)
    #print('map shape is {}'.format(df_map.shape))
    #print(df_map)
    #df_all_msrs[df_all_msrs[testfields_MeasAppType].notnull().any(1)
    atrfinalcol = 'ATRFieldlist'
    atrcol = 'InternalFields'
    claimcol = 'FD_SampleFieldName'
    wkbcol = 'workbookFieldName'
    dbcol = 'databasefieldname'
    srccol = 'atraccess'
    constcol = 'Constant'
    calccol = 'Calculation'
    calc2col = 'DependantCalc'
    rndcol = 'roundto'
    eqncol = ['eqn_true', 'eqn_false', 'eqn_condition']
    claimheaders = [atrcol, claimcol]
    #wkbheaders = [atrcol,wkbcol]
    dbheaders = [atrcol, dbcol]
    srcheaders = [atrcol, srccol]
    calcheaders = [atrcol, calccol]
    calc2headers = [atrcol, calc2col]
    constheaders = [atrcol, constcol]
    eqnheaders = [atrcol]
    eqnheaders.extend(eqncol)
    rndheaders = [atrfinalcol, rndcol]

    #Bring in Faiths claim fields
    df_map_claimfields = df_map[df_map[claimcol].notnull()]
    df_map_claimfields = df_map_claimfields[claimheaders]
    df_dups = None
    df_dups = df_map_claimfields[df_map_claimfields.duplicated(claimcol)]
    print('dup is {}'.format(df_dups))

    df_map_claimfields.drop(df_dups[atrcol].tolist(), axis=0, inplace=True)
    claimdict = df_map_claimfields.set_index(claimcol)[atrcol].to_dict()
    df_atr = sample_list[df_map_claimfields[claimcol].tolist()]
    df_atr = df_atr.rename(columns=claimdict)
    #if df_dups is not None:
    if len(df_dups.index) > 0:
        #only works if there are not multiple of the same dup
        #add the dup fields
        claimdict = df_dups.set_index(claimcol)[atrcol].to_dict()
        tmp = sample_list.rename(columns=claimdict)
        df_atr = df_atr.join(tmp[df_dups[atrcol].tolist()])
        del tmp
        #print ('atr shape : {}, cols: {}'.format(df_atr.shape, df_atr.columns))

        #Add fields from db column
    df_map_dbfields = df_map[df_map[dbcol].notnull()]
    df_map_dbfields = df_map_dbfields[dbheaders]
    df_dups = None
    df_dups = df_map_dbfields[df_map_dbfields.duplicated(dbcol)]
    print('dup is {}'.format(df_dups))
    df_map_dbfields.drop(df_dups[atrcol].tolist(), axis=0,
                         inplace=True)  #works even if no dups
    dbdict = df_map_dbfields.set_index(dbcol)[atrcol].to_dict()
    df_eval_msrs = df_complete_msrs[df_map_dbfields[dbcol].tolist()]
    df_eval_msrs = df_eval_msrs.rename(columns=dbdict)
    df_atr = df_atr.set_index('ClaimId').join(
        df_eval_msrs.set_index('ClaimId')
    )  #uses lowercase because fields have been renamed to ATR versions

    #df_atr = sample_list[df_map_dbfields[claimcol].tolist()]
    #df_atr = df_atr.rename(columns=claimdict)
    if len(df_dups.index) > 0:
        #only works if there are not multiple of the same dup
        #add the dup fields
        dbdictdup = df_dups.set_index(dbcol)[atrcol].to_dict()
        tmp = df_complete_msrs.rename(columns=dbdictdup)
        if not df_atr.index.name == 'ClaimId':
            df_atr.set_index('ClaimId', inplace=True)
        df_atr = df_atr.join(
            tmp.set_index('ClaimID')[df_dups[atrcol].tolist()])
        del tmp
        #print ('atr shape : {}, cols: {}'.format(df_atr.shape, df_atr.columns))

    #print ('atr shape with db fields : {}'.format(df_atr.shape))

    # Add in atraccess fields
    df_map_srcfields = df_map[df_map[srccol].notnull()]
    df_map_srcfields = df_map_srcfields[srcheaders]
    df_src = pd.read_csv(params.D0_ATR_SOURCE_FILE)
    df_src = df_src[df_map_srcfields[srccol].tolist()]
    df_atr = df_atr.join(df_src.set_index('ClaimId'))

    #print ('atr shape with atr source fields : {}'.format(df_atr.shape))

    # At this point df_atr has all the fields in columns B:F from cross ref. Just need to add the calculated fields
    #pull in constants
    df_map_constants = df_map[df_map[constcol].notnull()]
    df_map_constants = df_map_constants[constheaders]
    dbdict = df_map_constants.set_index(atrcol)[constcol].to_dict()
    for i in dbdict:
        if isinstance(dbdict[i],
                      (int, float, complex)) or dbdict[i].lower() != "null":
            df_atr[i] = dbdict[i]
        else:
            df_atr[i] = None

    #print ('atr shape with constants added : {}'.format(df_atr.shape))

    #some constants for the calculated stuff
    operators = '*/()+-><!==AND&OR|.'
    stringops = 'ANDOR&|'
    dfname = 'df_all_fields'

    #pull in conditional
    df_map_eqnfields = df_map[df_map[eqncol].notnull().any(1)]
    df_map_eqnfields = df_map_eqnfields[eqnheaders]
    for _, row in df_map_eqnfields.iterrows():
        df_all_fields = df_atr.join(df_complete_msrs.set_index('ClaimID'),
                                    how='left',
                                    rsuffix='_msr')
        #print ('type of row is {}, fieldname is {}, value is {}'.format(type(row), row[eqncol], row))
        parts_true = row[eqncol[0]].split()
        parts_false = row[eqncol[1]].split()
        parts_cond = row[eqncol[2]].split()

        nojoin = False
        for p in parts_true:
            if len(parts_true) == 1:
                eqn_true = p
                nojoin = True
            elif p not in operators and '==' not in p and not is_number(p):
                parts_true[parts_true.index(p)] = "{}['{}']".format(dfname, p)
        if not nojoin:
            eqn_true = ''.join(parts_true)
        nojoin = False

        for p in parts_false:
            if len(parts_false) == 1:
                eqn_false = p
                nojoin = True
            elif p not in operators and '==' not in p and not is_number(p):
                parts_false[parts_false.index(p)] = "{}['{}']".format(
                    dfname, p)
        if not nojoin:
            eqn_false = ''.join(parts_false)
        nojoin = False

        for p in parts_cond:
            if len(parts_cond) == 1:
                eqn_cond = p
                nojoin = True
            elif p not in operators and '==' not in p and '.' not in p and not is_number(
                    p
            ):  #maybe change to startswith in operators if can use list
                parts_cond[parts_cond.index(p)] = "{}['{}']".format(dfname, p)
        if not nojoin:
            eqn_cond = ''.join(parts_cond)

        #print('eqn true:{}'.format(eqn_true))
        #print('eqn false:{}'.format(eqn_false))
        #print('eqn cond:{}'.format(eqn_cond))

        #df['d'] = df['b'].where(df['b'] < 0, df['c'])

        myargs = eqn_true + '.where(' + eqn_cond + ', ' + eqn_false + ')'
        dftmp = df_complete_msrs.set_index('ClaimID')
        dftmp[row[atrcol]] = eval(myargs)

        dftmp = dftmp[row[atrcol]]
        df_atr = df_atr.join(dftmp)

    #print ('atr shape with conditionals added : {}'.format(df_atr.shape))

    ###    #pull in the calculated fields
    df_map_calcfields = df_map[df_map[calccol].notnull()]
    df_map_calcfields = df_map_calcfields[calcheaders]
    #append dependent calcs
    df_map_calc2fields = df_map[df_map[calc2col].notnull()]
    df_map_calc2fields = df_map_calc2fields[calc2headers]
    df_map_calc2fields.columns = calcheaders  #.rename({calc2col : calccol}, inplace=True)
    df_map_calcfields = pd.concat([df_map_calcfields, df_map_calc2fields])

    #loop through
    for _, row in df_map_calcfields.iterrows():
        df_all_fields = df_atr.join(df_complete_msrs.set_index('ClaimID'),
                                    how='left',
                                    rsuffix='_msr')
        parts = row[calccol].split()
        nojoin = False
        for p in parts:
            if len(parts) == 1:
                eqn = p
                nojoin = True
            elif p not in operators and '==' not in p and '.' not in p and not is_number(
                    p):
                parts[parts.index(p)] = "{}['{}']".format(dfname, p)
            elif p in stringops:
                parts[parts.index(p)] = ' %s ' % p

        if not nojoin:
            eqn = ''.join(parts)
        nojoin = False
        myargs = eqn
        dftmp = df_complete_msrs.set_index('ClaimID')
        dftmp[row[atrcol]] = eval(myargs)

        dftmp = dftmp[row[atrcol]]

        df_atr = df_atr.join(dftmp)

    #New appends the last field delete if trouble
    df_all_fields = df_atr.join(df_complete_msrs.set_index('ClaimID'),
                                how='left',
                                rsuffix='_msr')
    #above ok?
    print('atr shape with everything added : {}'.format(df_atr.shape))

    ## Drop passthru from atr to dbreview record will stay in place
    #df[df.name != 'Tina']
    df_atr = df_atr[df_atr.EvalNetPassThru != True]
    print('atr shape after drop EvalNetPassThru : {}'.format(df_atr.shape))

    # Pull in db review file
    df_dbrvw = pd.read_csv(params.D0_DATABASE_REVIEW_FILE)
    # Hack to fix string number column. Not needed anymore
    #df_dbrvw["ExAnteLifecycleNetkWh"] = df_dbrvw["ExAnteLifecycleNetkWh"].str.replace('-', '0')
    #df_dbrvw["ExAnteLifecycleNetkWh"] = df_dbrvw["ExAnteLifecycleNetkWh"].str.replace(',', '').astype(float)

    #Combine with atr
    #dropping all sampled records, then join
    ''' Didn't work
    #df_dbrvwnonsampled = (df_dbrvw.merge(df_atr, on=['ClaimID', 'ClaimId'], how='left', indicator=True)
    df_dbrvwnonsampled = (df_dbrvw.merge(df_atr, how='left', indicator=True)
     .query('_merge == "left_only"')
     .drop('_merge', 1))
    '''
    df_dbrvwnonsampled = df_dbrvw[~df_dbrvw.ClaimId.isin(df_atr.index)]

    #print('original shape is {}, after drop it is {}'.format(df_dbrvw.shape, df_dbrvwnonsampled.shape))

    #df_atr.reset_index(inplace=True)
    df_fullatr = pd.concat([df_dbrvwnonsampled.set_index('ClaimId'), df_atr],
                           sort=False)
    try:
        csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\allfieldsatrdata.csv'
        df_fullatr.to_csv(csvfile)
        logging.info(f'wrote csv for {csvfile}')
    except:
        logging.info("file in use writing to backup")
        df_fullatr.to_csv(params.SAMPLED_SITE_REVIEW_PATH +
                          '\\allfieldsatrdata2.csv')

    #get cols
    df_map_atrfields = df_map[df_map[atrfinalcol].notnull()]
    atrcols = df_map_atrfields[atrfinalcol].tolist()
    #print('final cols are {}'.format(atrcols))
    df_fullatr.index.name = 'ClaimId'
    df_fullatr = df_fullatr.reset_index()
    #df_fullatr.rename(columns={'ClaimID': 'ClaimId'}, inplace = True )
    df_fullatr = df_fullatr[atrcols]
    df_fullatr.set_index('ClaimId', inplace=True)

    #Final data Clean up
    #df_fullatr.fillna(0, inplace=True) # don't turn back on unless deal with marketeffectsbenefits nulls
    df_fullatr.replace({True: '1', False: '0'}, inplace=True)

    # round output for passthru = 0
    rounding = False
    if (rounding):
        df_map_roundfields = df_map[df_map[rndcol].notnull()]
        df_map_roundfields = df_map_roundfields[rndheaders]
        df_map_roundfields.set_index(atrfinalcol, inplace=True)
        df_map_roundfields = df_map_roundfields.astype({rndcol: int})
        df_map_roundfields['types'] = 'float'
        df_fullatr = df_fullatr.astype(df_map_roundfields['types'])
        print('fullatr shape before round: {}'.format(df_fullatr.shape))
        df_fullatr[df_fullatr['EvalNetPassThru'] == 1].round(
            df_map_roundfields[rndcol])
        print('fullatr shape after round: {}'.format(df_fullatr.shape))
    #df_fullatr = sigfigs(df_fullatr)
    print('alldata shape: {}'.format(df_all_fields.shape))

    logging.info('printing atr files')
    try:
        csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\atrdata.csv'
        df_atr.to_csv(csvfile)
        logging.info(f'wrote csv for {csvfile}')
    except:
        logging.info("file in use writing to backup")
        df_atr.to_csv(params.SAMPLED_SITE_REVIEW_PATH + '\\tmpatr2.csv')

    try:
        csvfile = params.D0_ALL_DATA_FILE
        #csvfile = params.SAMPLED_SITE_REVIEW_PATH + '\\alldata.csv'
        df_all_fields.to_csv(csvfile)
        logging.info(f'wrote csv for {csvfile}')
    except:
        logging.info("file in use writing to backup")
        df_all_fields.to_csv(params.SAMPLED_SITE_REVIEW_PATH +
                             '\\alldata2.csv')

    try:
        csvfile = params.D0_LOCAL_ATR_OUTPUT_FILE
        df_fullatr.to_csv(csvfile)
        logging.info(f'wrote csv for {csvfile}')
    except:
        logging.info("file in use writing to backup")
        df_fullatr.to_csv(params.D0_ESPI_PATH + '\\finalatr_backup.csv')

    if WriteFinalFile:
        try:
            csvfile = params.D0_ATR_OUTPUT_FILE
            df_fullatr.to_csv(csvfile)
            logging.info(f'wrote csv for {csvfile}')

        except:
            logging.info("file in use writing to backup")
            df_fullatr.to_csv(params.D0_ESPI_PATH + '\\actualatr_backup.csv')

    print('yup')