def instr_val(self, valtypes, op_sids, start_dt_str, end_dt_str, fld_varnames=None, ltypes=None, lstages=None, run_op_report=False, ip_path=None): # Validation data are from field measurements (daily log sheet) if fld_varnames: query_varnames = ['Barometer Pressure (mmHg)'] for varname in fld_varnames: # Sometimes the user needs to specify a PAIR of variables (eg pressure upstream AND downstream of pump) if type(varname) == tuple: query_varnames.append(varname[0]) query_varnames.append(varname[1]) # Otherwise just single variable name else: query_varnames.append(varname) # Clean the query variables query_varnames = [ fld.clean_varname(varname) for varname in query_varnames ] # Query the field data (using clean variable names) valdat = fld.get_data()[['Timestamp'] + query_varnames] # Create time variable with minute resolution from field data Timestamp variable valdat['Time'] = pd.to_datetime( valdat['Timestamp']).values.astype('datetime64[m]') # Replace missing barometric pressure readings with the mean psi at sea level valdat.loc[:, 'Barometer_Pressure_mmHg'] = pd.to_numeric( valdat['Barometer_Pressure_mmHg'], errors='coerce') valdat.loc[np.isnan(valdat['Barometer_Pressure_mmHg']), 'Barometer_Pressure_mmHg'] = 760 # Loop through field variables to convert to numeric and calculate differences (if necessary) for varInd, varname in enumerate(fld_varnames): if type(varname) == tuple: valdat[op_sids[varInd] + 'VAL'] = \ pd.to_numeric(valdat[fld.clean_varname(varname[0])], errors = 'coerce') - \ pd.to_numeric(valdat[fld.clean_varname(varname[1])], errors = 'coerce') else: valdat[op_sids[varInd] + 'VAL'] = pd.to_numeric( valdat[fld.clean_varname(varname)], errors='coerce') valdat = valdat[['Time', 'Barometer_Pressure_mmHg'] + [sid + 'VAL' for sid in op_sids]] # Validation data are from lab measurements elif ltypes or lstages: valdatLong = pd.concat( [pld.get_data([ltype])[ltype] for ltype in ltypes], axis=0) valdatLong = valdatLong.loc[valdatLong['Stage'].isin(lstages), :] # Convert to wide format # Calculate mean by obsid to account for possibility of multiple PH measurements taken for single sample valdatLong = valdatLong.groupby( ['Date_Time', 'Stage', 'Type', 'obs_id']).mean() valdatWide = valdatLong.unstack(['Type', 'Stage']) valdatWide.reset_index(inplace=True) # valdat = valdatWide['Date_Time'] valdat = pd.DataFrame(valdatWide['Date_Time'].values, columns=['Time']) valdatColnames = [ op_sids[lind] + 'VAL' for lind, ltype in enumerate(ltypes) ] for lind, ltype in enumerate(ltypes): valdat[valdatColnames[lind]] = valdatWide['Value'][ltype][ lstages[lind]] valdat = valdat[['Time'] + valdatColnames] # Expand valdat to get copies of each logged value for each of: # 10 minutes before and 10 minutes after it was entered into the google form valdatList = [] for minDiff in range(-10, 11): valdatDiff = valdat.copy() valdatDiff['Time'] = valdatDiff['Time'] + timedelta( seconds=minDiff * 60) valdatList.append(valdatDiff) valdatAll = pd.concat(valdatList, axis=0) # Get op data for the element ids whose measurements are being validated nsids = len(op_sids) # Run op report if requested (minute level) if run_op_report: op_run = op.op_data_agg(start_dt_str, end_dt_str, ip_path=ip_path) op_run.run_agg( valtypes, # Type of sensor (case insensitive, can be water, gas, pH, conductivity, temp, or tmp op_sids, # Sensor ids that you want summary data for (have to be in op data file obviously) [1] * nsids, # Number of minutes you want to average over ['MINUTE'] * nsids, # Type of time period (can be "hour" or "minute") ) # Retrieve data from SQL file opdat = op.get_data(valtypes, op_sids, [1] * nsids, ['MINUTE'] * nsids, combine_all=True, start_dt_str=start_dt_str, end_dt_str=end_dt_str) # Merge the op data with the validation data valdatMerged = opdat.merge(valdatAll, on='Time', how='inner') # Merge all values on a day (since we are validating on a -10 to +10 minute window) valdatMerged.loc[:, 'Date'] = valdatMerged['Time'].dt.date # Take average of time Window valdatMerged = valdatMerged.groupby('Date').mean() valdatMerged.reset_index(inplace=True) valdatMerged.loc[:, 'Date'] = pd.to_datetime(valdatMerged['Date']) # Loop through each instrument to compute error and output plots # IF evidence of a significant difference between validated vs op or if instrument drift over time for sind, sid in enumerate(op_sids): if valtypes[sind] == 'PRESSURE': # Convert barometric pressure readings to psi valdatMerged.loc[:, 'Barometer_Pressure_mmHg'] = valdatMerged[ 'Barometer_Pressure_mmHg'] * 0.0193368 # Convert pressure to inches of head valdatMerged.loc[:, sid] = ( valdatMerged[sid] - valdatMerged['Barometer_Pressure_mmHg']) * 27.7076 # Compute the percentage error (op measurement vs validation) valdatMerged.loc[:, 'error'] = ( valdatMerged[sid] - valdatMerged[sid + 'VAL']) / valdatMerged[sid + 'VAL'] # Subset to the element of interest valdatSub = valdatMerged.loc[:, ['Date', sid, sid + 'VAL', 'error']] valdatSub.replace([np.inf, -np.inf], np.nan, inplace=True) valdatSub.dropna(inplace=True) # Only continue if there are observations (sometimes there arent...) if valdatSub.size > 0: # Convert time to numeric variable valX = pd.to_numeric( valdatSub.loc[:, 'Date']) / (10**9 * 3600 * 24) # Perform 2-sample t-test for difference in means tStatMeans, pvalMeans = stats.ttest_ind( valdatSub[sid].values, valdatSub[sid + 'VAL'].values) # Regress error on time (to test for drift), divide by 10**9*3600*24 so coefficients are in terms of days slope, intercept, Rsq, pValTrend, stdErr = stats.linregress( valX, valdatSub['error'].values) # If drift is significant at the 10% level, or if means are significantly different, produce a plot with a warning if pValTrend < 0.1 or pvalMeans < 0.1: fig, ax = plt.subplots(1, 1) gs1 = gridspec.GridSpec(1, 1) fig.subplots_adjust(top=0.90, right=0.7) title = fig.suptitle( 'Instrument Validation: {0}'.format(sid), fontweight='bold', fontsize=12, y=0.99) dates = [ pd.to_datetime(date) for date in valdatSub['Date'].dt.date.values ] measure = ax.scatter(dates, valdatSub[sid], marker='o') validated = ax.scatter(dates, valdatSub[sid + 'VAL'], color='r', marker='o') ax.text(0.8, 0.15, 'p-Value (Trend): {0}'.format(round(pValTrend, 3)), bbox=dict(facecolor='black', alpha=0.1), transform=ax.transAxes) ax.text(0.8, 0.05, 'p-Value (Diff.): {0}'.format(round(pvalMeans, 3)), bbox=dict(facecolor='black', alpha=0.1), transform=ax.transAxes) plt.xlim( min(dates) - timedelta(days=1), max(dates) + timedelta(days=1)) plt.xticks(rotation=45) lgd = ax.legend(('op Value', 'Validated Measure'), loc='center left', bbox_to_anchor=(0.75, 0.90), fancybox=True) plt.tight_layout() # Output plot to directory of choice plot_filename = "InstrumentValidation_{0}.png".format(sid) fig = matplotlib.pyplot.gcf() fig.set_size_inches(10, 5) plt.savefig(os.path.join(self.outdir, plot_filename), bbox_extra_artists=(lgd, title)) plt.close() return
def get_cod_bal(self, end_dt_str, nweeks, plot=True, table=True): # Window for moving average calculation ma_win = 1 end_dt = dt.strptime(end_dt_str, '%m-%d-%y').date() start_dt = end_dt - timedelta(days=7 * nweeks) start_dt = start_dt start_dt_str = dt.strftime(start_dt, '%m-%d-%y') start_dt_query = start_dt - timedelta(days=ma_win) start_dt_qstr = dt.strftime(start_dt_query, '%m-%d-%y') # op element IDs for gas, temperature and influent/effluent flow meters gas_sids = ['FT700', 'FT704'] temp_sids = ['AT304', 'AT310'] inf_sid = 'FT202' eff_sid = 'FT305' # Length of time period for which data are being queried perLen = 1 # Type of time period for which data are being queried tperiod = 'HOUR' # Reactor volumes l_p_gal = 3.78541 # Liters/Gallon # L in a mol of gas at STP Vol_STP = 22.4 #=========================================> op DATA <========================================= # If requested, run the op_data_agg script for the reactor meters and time period of interest if self.run_agg_feeding or self.run_agg_gasprod or self.run_agg_temp: get_op = op_run(start_dt_str, end_dt_str, ip_path=self.ip_path) if self.run_agg_feeding: get_op.run_agg( ['water'] * 2, # Type of sensor (case insensitive, can be water, gas, pH, conductivity, temp, or tmp [ inf_sid, eff_sid ], # Sensor ids that you want summary data for (have to be in op data file obviously) [perLen] * 2, # Number of hours you want to average over [tperiod] * 2 # Type of time period (can be "hour" or "minute") ) if self.run_agg_gasprod: get_op.run_agg( ['GAS'] * len( gas_sids ), # Type of sensor (case insensitive, can be water, gas, pH, conductivity, temp, or tmp gas_sids, # Sensor ids that you want summary data for (have to be in op data file obviously) [perLen] * len(gas_sids), # Number of hours you want to average over [tperiod] * len(gas_sids ), # Type of time period (can be "hour" or "minute") ) if self.run_agg_temp: get_op.run_agg( ['TEMP'] * len( temp_sids ), # Type of sensor (case insensitive, can be water, gas, pH, conductivity, temp, or tmp temp_sids, # Sensor ids that you want summary data for (have to be in op data file obviously) [perLen] * len(temp_sids), # Number of hours you want to average over [tperiod] * len(temp_sids ), # Type of time period (can be "hour" or "minute") ) # Read in the data gasprod_dat = op.get_data(['GAS'] * 2, gas_sids, [perLen] * len(gas_sids), [tperiod] * len(gas_sids), combine_all=True, start_dt_str=start_dt_str, end_dt_str=end_dt_str) # Do the same for feeding and temperature feeding_dat = op.get_data(['WATER'] * 2, [inf_sid, eff_sid], [perLen] * 2, [tperiod] * 2, combine_all=True, start_dt_str=start_dt_str, end_dt_str=end_dt_str) temp_dat = op.get_data(['TEMP'] * 2, temp_sids, [perLen] * len(temp_sids), [tperiod] * len(temp_sids), combine_all=True, start_dt_str=start_dt_str, end_dt_str=end_dt_str) # Prep the op data gasprod_dat['Meas Biogas Prod'] = (gasprod_dat['FT700'] + gasprod_dat['FT704']) * 60 * perLen gasprod_dat['Date'] = gasprod_dat['Time'].dt.date gasprod_dat_cln = gasprod_dat[['Date', 'Meas Biogas Prod']] gasprod_dat_cln = gasprod_dat_cln.groupby('Date').sum() gasprod_dat_cln.reset_index(inplace=True) # Feeding op Data feeding_dat['Flow In'] = feeding_dat[inf_sid] * 60 * perLen * l_p_gal feeding_dat['Flow Out'] = feeding_dat[eff_sid] * 60 * perLen * l_p_gal feeding_dat['Date'] = feeding_dat['Time'].dt.date feeding_dat_cln = feeding_dat[['Date', 'Flow In', 'Flow Out']] feeding_dat_cln = feeding_dat_cln.groupby('Date').sum() feeding_dat_cln.reset_index(inplace=True) # Reactor Temperature op data temp_dat['Reactor Temp (C)'] = \ (temp_dat['AT304']*self.afbr_vol + temp_dat['AT310']*self.afmbr_vol)/self.react_vol temp_dat['Date'] = temp_dat['Time'].dt.date temp_dat_cln = temp_dat[['Date', 'Reactor Temp (C)']] temp_dat_cln = temp_dat_cln.groupby('Date').mean() temp_dat_cln.reset_index(inplace=True) # List of op dataframes op_dflist = [feeding_dat_cln, gasprod_dat_cln, temp_dat_cln] # Merge op datasets opdat_ud = functools.reduce( lambda left, right: pd.merge(left, right, on='Date', how='outer'), op_dflist) #=========================================> op DATA <========================================= #=========================================> LAB DATA <========================================= # Get lab data from file on box and filter to desired dates labdat = pld.get_data(['COD', 'TSS_VSS', 'SULFATE', 'GASCOMP']) # COD data cod_dat = labdat['COD'] cod_dat['Date'] = cod_dat['Date_Time'].dt.date # Drop duplicates cod_dat.drop_duplicates(keep='first', inplace=True) # Get average of multiple values taken on same day cod_dat = cod_dat.groupby(['Date', 'Stage', 'Type']).mean() # Convert to wide to get COD in and out of the reactors cod_dat_wide = cod_dat.unstack(['Stage', 'Type']) cod_dat_wide['CODt MS'] = cod_dat_wide['Value']['Microscreen']['Total'] # Weighted aveage COD concentrations in the reactors cod_dat_wide['CODt R'] = \ (cod_dat_wide['Value']['AFBR']['Total']*self.afbr_vol +\ cod_dat_wide['Value']['Duty AFMBR MLSS']['Total']*self.afmbr_vol)/\ (self.react_vol) cod_dat_wide['CODt Out'] = cod_dat_wide['Value'][ 'Duty AFMBR Effluent']['Total'] cod_dat_wide.reset_index(inplace=True) cod_dat_cln = cod_dat_wide[['Date', 'CODt MS', 'CODt R', 'CODt Out']] cod_dat_cln.columns = ['Date', 'CODt MS', 'CODt R', 'CODt Out'] # Gas Composition Data gc_dat = labdat['GASCOMP'] gc_dat['Date'] = gc_dat['Date_Time'].dt.date gc_dat = gc_dat.loc[(gc_dat['Type'].isin( ['Methane (%)', 'Carbon Dioxide (%)']))] gc_dat = gc_dat.groupby(['Date', 'Type']).mean() gc_dat_wide = gc_dat.unstack('Type') gc_dat_wide['CH4%'] = gc_dat_wide['Value']['Methane (%)'] gc_dat_wide['CO2%'] = gc_dat_wide['Value']['Carbon Dioxide (%)'] gc_dat_wide.reset_index(inplace=True) gc_dat_cln = gc_dat_wide[['Date', 'CH4%', 'CO2%']] gc_dat_cln.columns = ['Date', 'CH4%', 'CO2%'] # VSS Data vss_dat = labdat['TSS_VSS'] vss_dat['Date'] = vss_dat['Date_Time'].dt.date # Drop duplicates vss_dat.drop_duplicates(keep='first', inplace=True) # Get average of multiple values taken on same day vss_dat = vss_dat.groupby(['Date', 'Stage', 'Type']).mean() # Convert to wide to get COD in and out of the reactors vss_dat_wide = vss_dat.unstack(['Stage', 'Type']) # Weighted aveage COD concentrations in the reactors vss_dat_wide['VSS R'] = \ (vss_dat_wide['Value']['AFBR']['VSS']*self.afbr_vol +\ vss_dat_wide['Value']['Duty AFMBR MLSS']['VSS']*self.afmbr_vol)/\ (self.afbr_vol + self.afmbr_vol) vss_dat_wide['VSS Out'] = vss_dat_wide['Value']['Duty AFMBR Effluent'][ 'VSS'] vss_dat_wide.reset_index(inplace=True) vss_dat_cln = vss_dat_wide[['Date', 'VSS R', 'VSS Out']] vss_dat_cln.columns = ['Date', 'VSS R', 'VSS Out'] # Solids Wasting Data waste_dat = fld.get_data(['AFMBR_Volume_Wasted_Gal']) waste_dat['Date'] = pd.to_datetime(waste_dat['Timestamp']).dt.date waste_dat['AFMBR Volume Wasted (Gal)'] = waste_dat[ 'AFMBR_Volume_Wasted_Gal'].astype('float') waste_dat[ 'Wasted (L)'] = waste_dat['AFMBR Volume Wasted (Gal)'] * l_p_gal waste_dat_cln = waste_dat[['Date', 'Wasted (L)']] # Sulfate data so4_dat = labdat['SULFATE'] so4_dat['Date'] = so4_dat['Date_Time'] so4_dat = so4_dat.groupby(['Date', 'Stage']).mean() so4_dat_wide = so4_dat.unstack(['Stage']) so4_dat_wide['SO4 MS'] = so4_dat_wide['Value']['Microscreen'] so4_dat_wide.reset_index(inplace=True) so4_dat_cln = so4_dat_wide[['Date', 'SO4 MS']] so4_dat_cln.columns = ['Date', 'SO4 MS'] so4_dat_cln.loc[:, 'Date'] = so4_dat_cln['Date'].dt.date # List of lab dataframes lab_dflist = [ cod_dat_cln, gc_dat_cln, waste_dat_cln, so4_dat_cln, vss_dat_cln ] # Merge lab datasets labdat = functools.reduce( lambda left, right: pd.merge(left, right, on='Date', how='outer'), lab_dflist) # Get daily average of readings if multiple readings in a day (also prevents merging issues!) labdat_ud = labdat.groupby('Date').mean() labdat_ud.reset_index(inplace=True) #=========================================> LAB DATA <========================================= #=======================================> MERGE & PREP <======================================= # Merge Lab and op cod_bal_dat = labdat_ud.merge(opdat_ud, on='Date', how='outer') # Dedupe (merging many files, so any duplicates can cause big problems!) cod_bal_dat.drop_duplicates(inplace=True) # Convert missing wasting data to 0 (assume no solids wasted that day) cod_bal_dat.loc[np.isnan(cod_bal_dat['Wasted (L)']), 'Wasted (L)'] = 0 # Fill in missing lab data # First get means of observed data cod_bal_means = \ cod_bal_dat[[ 'CH4%','CO2%', 'CODt MS','CODt R','CODt Out', 'VSS R','VSS Out', 'SO4 MS' ]].mean() # Then interpolate cod_bal_dat.sort_values(['Date'], inplace=True) cod_bal_dat.set_index('Date', inplace=True) cod_bal_dat[[ 'CH4%','CO2%', 'CODt MS','CODt R','CODt Out', 'VSS R','VSS Out', 'SO4 MS' ]] = \ cod_bal_dat[[ 'CH4%','CO2%', 'CODt MS','CODt R','CODt Out', 'VSS R','VSS Out', 'SO4 MS' ]].interpolate() # Then fill remaining missing values with the means of all variables fill_values = { 'CH4%': cod_bal_means['CH4%'], 'CO2%': cod_bal_means['CO2%'], 'CODt MS': cod_bal_means['CODt MS'], 'CODt R': cod_bal_means['CODt R'], 'CODt Out': cod_bal_means['CODt Out'], 'VSS R': cod_bal_means['VSS R'], 'VSS Out': cod_bal_means['VSS Out'], 'SO4 MS': cod_bal_means['SO4 MS'] } cod_bal_dat.fillna(value=fill_values, inplace=True) # Get moving average of COD in reactors (data bounce around a lot) cod_cols = ['CODt MS', 'CODt R', 'CODt Out'] cod_bal_dat[cod_cols] = cod_bal_dat[cod_cols].rolling(ma_win).mean() # Reset index cod_bal_dat.reset_index(inplace=True) # Put dates into weekly bins (relative to end date), denoted by beginning of week cod_bal_dat['Weeks Back'] = \ pd.to_timedelta(np.floor((cod_bal_dat['Date'] - end_dt)/np.timedelta64(7,'D'))*7, unit = 'D') cod_bal_dat['Week Start'] = pd.to_datetime( end_dt) + cod_bal_dat['Weeks Back'] cod_bal_dat = cod_bal_dat.loc[(cod_bal_dat['Date'] >= start_dt) & (cod_bal_dat['Date'] <= end_dt), :] #=======================================> MERGE & PREP <======================================= #========================================> COD Balance <======================================= # Note: dividing by 1E6 to express in kg # COD coming in from the Microscreen cod_bal_dat[ 'COD In'] = cod_bal_dat['CODt MS'] * cod_bal_dat['Flow In'] / 1E6 # COD leaving the reactor cod_bal_dat['COD Out'] = cod_bal_dat['CODt Out'] * cod_bal_dat[ 'Flow Out'] / 1E6 # COD wasted cod_bal_dat['COD Wasted'] = cod_bal_dat['CODt R'] * cod_bal_dat[ 'Wasted (L)'] / 1E6 # COD content of gas (assumes that volume given by flowmeter is in STP) cod_bal_dat['Biogas'] = cod_bal_dat['Meas Biogas Prod'] * cod_bal_dat[ 'CH4%'] / 100 / Vol_STP * 64 / 1000 # COD content of dissolved methane (estimated from temperature of reactors) cod_diss_conc = map(self.est_diss_ch4, cod_bal_dat['Reactor Temp (C)'].values, cod_bal_dat['CH4%'].values) cod_bal_dat['Dissolved CH4'] = np.array( list(cod_diss_conc)) * cod_bal_dat['Flow Out'] / 1E6 # COD from sulfate reduction (1.5g COD per g SO4, units are in mg/L S) cod_bal_dat['Sulfate Reduction'] = cod_bal_dat['SO4 MS'] * cod_bal_dat[ 'Flow In'] / 1.5 / 1E6 * 48 / 16 #========================================> COD Balance <======================================= # Convert to weekly data cod_bal_wkly = cod_bal_dat.groupby('Week Start').sum(numeric_only=True) cod_bal_wkly.reset_index(inplace=True) cod_bal_wkly.loc[:, 'Week Start'] = cod_bal_wkly['Week Start'].dt.date cod_bal_wkly = cod_bal_wkly.loc[cod_bal_wkly['Week Start'] < end_dt, :] #===========================================> Plot! <========================================== if plot: fig, ax = plt.subplots() title = fig.suptitle('Weekly COD Mass Balance', fontsize=14, fontweight='bold', y=0.95) nWeeks = np.arange(len(cod_bal_wkly)) bWidth = 0.8 pBiogas = plt.bar(nWeeks, cod_bal_wkly['Biogas'], bWidth) bottomCum = cod_bal_wkly['Biogas'].values pOut = plt.bar(nWeeks, cod_bal_wkly['COD Out'], bWidth, bottom=bottomCum) bottomCum += cod_bal_wkly['COD Out'] pDiss = plt.bar(nWeeks, cod_bal_wkly['Dissolved CH4'], bWidth, bottom=bottomCum) bottomCum += cod_bal_wkly['Dissolved CH4'] pWasted = plt.bar(nWeeks, cod_bal_wkly['COD Wasted'], bWidth, bottom=bottomCum) bottomCum += cod_bal_wkly['COD Wasted'] pSO4 = plt.bar(nWeeks, cod_bal_wkly['Sulfate Reduction'], bWidth, bottom=bottomCum) pIn = plt.scatter(nWeeks, cod_bal_wkly['COD In'], c='r') plt.xticks(nWeeks, cod_bal_wkly['Week Start'], rotation=45) lgd = ax.legend( # (pIn,pBiogas[0],pOut[0],pDiss[0],pWasted[0],pSO4[0]), (pIn, pSO4[0], pWasted[0], pDiss[0], pOut[0], pBiogas[0]), ('COD In', 'Sulfate Reduction', 'Solids Wasting', 'Dissolved CH4', 'COD Out', 'Biogas'), loc='center left', bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True, ncol=1) plt.ylabel('kg of COD Equivalents', fontweight='bold') plt.xlabel('Week Start Date', fontweight='bold') plt.savefig(os.path.join(self.outdir, 'COD Balance.png'), bbox_extra_artists=( lgd, title, ), width=50, height=50, bbox_inches='tight') plt.close() #===========================================> Plot! <========================================== self.cod_bal_wkly = cod_bal_wkly if table: cod_bal_wkly[['Week Start','COD In','COD Out','Biogas','COD Wasted','Dissolved CH4','Sulfate Reduction']].\ to_csv( os.path.join(self.outdir, 'COD Balance.csv'), index = False, encoding = 'utf-8' )
def get_series(dclass, dtype, time_resolution, time_order, start_date, end_date, stages, types, sids, plotFormat): groupVars = ['Time'] series = [] dflist = [] seriesNamePrefix = plotFormat['seriesNamePrefix'] if dclass == 'Lab Data': df = lab_data[dtype] df.loc[:, 'Time'] = df['Date_Time'] df.loc[:, 'yvar'] = df['Value'] if stages: df = df[df['Stage'].isin(stages)] groupVars.append('Stage') else: stages = [None] if types: df = df[df['Type'].isin(types)] groupVars.append('Type') else: types = [None] # Average all measurements taken for a given sample df = df.groupby(groupVars).mean() df.reset_index(inplace=True) for stage in stages: for type_ in types: if stage and type_: dfsub = df[(df['Type'] == type_) & (df['Stage'] == stage)] seriesName = seriesNamePrefix + type_ + '-' + stage elif stage: dfsub = df[df['Stage'] == stage] seriesName = seriesNamePrefix + stage elif type_: dfsub = df[df['Type'] == type_] seriesName = seriesNamePrefix + type_ else: continue subSeries = {'seriesName': seriesName} subSeries['data'] = filter_resolve_time( dfsub, dtype, time_resolution, time_order, start_date, end_date) dflist += [subSeries] if dclass == 'Operational Data': for sind, sid in enumerate(sids): # Retrieve data try: # Try querying hourly data dfsub = op.get_data([dtype], [sid], [1], ['HOUR']) except: # Otherwise only available as minute data # Load minute data dfsub = op.get_data([dtype], [sid], [1], ['MINUTE']) # Group to hourly data dfsub.loc[:, 'Time'] = op_data['Time'].values.astype( 'datetime64[h]') dfsub = dfsub.groupby('Time').mean() dfsub.reset_index(inplace=True) if dtype in ['GAS', 'WATER']: dfsub.loc[:, [sid]] = dfsub[sid] * 60 dfsub.loc[:, 'yvar'] = dfsub[sid] seriesName = seriesNamePrefix + sid subSeries = {'seriesName': seriesName} subSeries['data'] = filter_resolve_time(dfsub, dtype, time_resolution, time_order, start_date, end_date) dflist += [subSeries] for df in dflist: for dfsub in df['data']: series.append( go.Scatter(x=dfsub['data']['Time'], y=dfsub['data']['yvar'], mode=plotFormat['mode'], opacity=0.8, marker={ 'size': plotFormat['size'], 'line': { 'width': 0.5, 'color': 'white' }, 'symbol': plotFormat['symbol'], }, line={'dash': plotFormat['dash']}, name=df['seriesName'] + dfsub['timeSuffix'], xaxis='x1', yaxis=plotFormat['yaxis'])) return series