def process_data(tableName='DailyLogResponses'): # Get the log data from gsheets logdata = cut.get_gsheet_data(tableName) # Eliminate special characters (':-?[]()') and replace spaces with '_' colnamesRaw = logdata.columns.values colnamesCln = [clean_varname(colname) for colname in colnamesRaw] # Replace columns names of dataset with clean column names logdata.columns = colnamesCln # SQL command strings for sqlite3 colnamesStr = ','.join(colnamesCln[1:]) colInsStr = ','.join(['?'] * len(colnamesCln)) create_str = """ CREATE TABLE IF NOT EXISTS {0} (Timestamp CHAR PRIMARY KEY, {1}) """.format(tableName, colnamesStr) ins_str = """ INSERT OR REPLACE INTO {0} (Timestamp,{1}) VALUES ({2}) """.format(tableName, colnamesStr, colInsStr) # Change to data directory data_dir = cut.get_dirs()[0] os.chdir(data_dir) # Create SQL object connection conn = sqlite3.connect('cr2c_fielddata.db') # Create table if it doesn't exist conn.execute(create_str) # Insert aggregated values for the sid and time period conn.executemany(ins_str, logdata.to_records(index=False).tolist()) conn.commit() # Close Connection conn.close()
def get_data(varNames = None, start_dt_str = None, end_dt_str = None, output_csv = False, outdir = None): # Convert date string inputs to dt variables if start_dt_str: start_dt = dt.strptime(start_dt_str, '%m-%d-%y') if end_dt_str: end_dt = dt.strptime(end_dt_str, '%m-%d-%y') tableNames = ['DailyLogResponses','DailyLogResponsesV2'] # Load data from SQL data_dir = cut.get_dirs()[0] os.chdir(data_dir) conn = sqlite3.connect('cr2c_fielddata.db') if varNames: varNames = [varName.upper() for varName in varNames] varNamesAll = 'Timestamp,' + ','.join(varNames) else: varNamesAll = '*' fielddata = pd.DataFrame([]) for tableName in tableNames: fielddata = pd.concat( [ fielddata, pd.read_sql( 'SELECT {0} FROM {1}'.format(varNamesAll, tableName), conn, coerce_float = True ) ], axis = 0, join = 'outer' ) # Dedupe data (some issue with duplicates) fielddata.drop_duplicates(inplace = True) # Convert Date_Time variable to a pd datetime and eliminate missing values fielddata['Timestamp'] = pd.to_datetime(fielddata['Timestamp']) fielddata.dropna(subset = ['Timestamp'], inplace = True) if start_dt_str: fielddata = fielddata.loc[fielddata['Date_Time'] >= start_dt,:] if end_dt_str: fielddata = fielddata.loc[fielddata['Date_Time'] <= end_dt + timedelta(days = 1),:] # Output csv if desired if output_csv: if varNames: op_fname = '{0}.csv'.format(','.join(varNames)) else: op_fname = 'cr2c-fieldData.csv' os.chdir(outdir) fielddata.to_csv(op_fname, index = False, encoding = 'utf-8') return fielddata
def __init__(self, verbose = False): self.ltype_list = \ ['PH','COD','TSS_VSS','ALKALINITY','VFA','GASCOMP','AMMONIA','SULFATE','TKN','BOD'] self.min_feas_dt = dt.strptime('6-1-16', '%m-%d-%y') self.file_dt = dt.now() self.file_dt_str = dt.strftime(self.file_dt,'%m-%d-%y') self.data_dir, self.pydir = cut.get_dirs() self.log_dir = os.path.join(self.data_dir,'Logs') self.verbose = verbose
def get_table_names(): # Create connection to SQL database data_dir = cut.get_dirs()[0] os.chdir(data_dir) conn = sqlite3.connect('cr2c_opdata.db') cursor = conn.cursor() # Execute cursor.execute(""" SELECT name FROM sqlite_master WHERE type ='table'""") return [names[0] for names in cursor.fetchall()]
def get_data( ltypes, start_dt_str = None, end_dt_str = None, output_csv = False, outdir = None ): # Convert date string inputs to dt variables if start_dt_str: start_dt = dt.strptime(start_dt_str, '%m-%d-%y') if end_dt_str: end_dt = dt.strptime(end_dt_str, '%m-%d-%y') # Load data from SQL data_dir = cut.get_dirs()[0] os.chdir(data_dir) conn = sqlite3.connect('cr2c_labdata.db') # Loop through types of lab data types (ltypes) ldata_all = {} for ltype in ltypes: # Clean user input wrt TSS_VSS if ltype.find('TSS') >= 0 or ltype.find('VSS') >= 0: ltype = 'TSS_VSS' ldata_long = pd.read_sql( 'SELECT * FROM {0}'.format(ltype), conn, coerce_float = True ) # Dedupe data (some issue with duplicates) ldata_long.drop_duplicates(inplace = True) # Convert Date_Time variable to a pd datetime and eliminate missing values ldata_long.loc[:,'Date_Time'] = pd.to_datetime(ldata_long['Date_Time']) ldata_long.dropna(subset = ['Date_Time'], inplace = True) # Filter to desired dates ldata_long.drop('DKey', axis = 1, inplace = True) if start_dt_str: ldata_long = ldata_long.loc[ldata_long['Date_Time'] >= start_dt,:] if end_dt_str: ldata_long = ldata_long.loc[ldata_long['Date_Time'] <= end_dt + timedelta(days = 1),:] # Output csv if desired if output_csv: os.chdir(outdir) ldata_long.to_csv(ltype + '.csv', index = False, encoding = 'utf-8') # Write to dictionary ldata_all[ltype] = ldata_long return ldata_all
import cr2c_fielddata as fld import cr2c_validation as val import cr2c_utils as cut ## Dash/Plotly import dash import dash_html_components as html import dash_core_components as dcc import plotly.graph_objs as go from dash.dependencies import Input, Output, State, Event from flask_caching import Cache # Initialize dash app app = dash.Dash(__name__) # Initialize flask cache cache_dir = os.path.join(cut.get_dirs()[0], 'flask-cache') cacheConfig = { # Probably preferable to use redis in the future 'CACHE_TYPE': 'filesystem', 'CACHE_DIR': cache_dir } cache = Cache() timeout = 300 cache.init_app(app.server, config=cacheConfig) app.config['suppress_callback_exceptions'] = True app.css.config.serve_locally = True app.scripts.config.serve_locally = True #================= Create datetimea layout map from existing data =================# lab_types = [
def get_data(stypes, sids, tperiods, ttypes, combine_all=True, year_sub=None, month_sub=None, start_dt_str=None, end_dt_str=None, output_csv=False, outdir=None): # Convert date string inputs to dt variables start_dt = dt.strptime('5-10-17', '%m-%d-%y') end_dt = dt.now() if start_dt_str: start_dt = dt.strptime(start_dt_str, '%m-%d-%y') if end_dt_str: end_dt = dt.strptime(end_dt_str, '%m-%d-%y') # Find Operational Data directory and change working directory data_dir = cut.get_dirs()[0] os.chdir(data_dir) # Initialize output data variable if combine_all: opdata_all = pd.DataFrame() else: opdata_all = {} # Manage data selection input nsids = len(sids) if nsids != len(stypes) or nsids != len(tperiods) or nsids != len(ttypes): print( 'Error in cr2c_opdata: get_data: The lengths of the sids, stypes, tperiods and ttypes arguments must be equal' ) sys.exit() # Manage month and year subset input (will be added to sqlite3 query string) sub_ins = '' if year_sub and month_sub: sub_ins = 'WHERE YEAR == {} AND Month == {}'.format( year_sub, month_sub) elif month_sub: sub_ins = 'WHERE Month == {}'.format(month_sub) elif year_sub: sub_ins = 'WHERE Year == {}'.format(year_sub) for sid, stype, tperiod, ttype in zip(sids, stypes, tperiods, ttypes): sql_str = """ SELECT distinct * FROM {0}_{1}_{2}_{3}_AVERAGES {4} order by Time """.format(stype, sid, tperiod, ttype, sub_ins) # Open connection and read to pandas dataframe conn = sqlite3.connect('cr2c_opdata.db') opdata = pd.read_sql(sql_str, conn, coerce_float=True) # Format the time variable opdata['Time'] = pd.to_datetime(opdata['Time']) # Set time variable if ttype == 'HOUR': opdata.loc[:, 'Time'] = opdata['Time'].values.astype('datetime64[h]') elif ttype == 'MINUTE': opdata.loc[:, 'Time'] = opdata['Time'].values.astype('datetime64[m]') # Drop duplicates (happens with hourly aggregates sometimes...) opdata.drop_duplicates(['Time'], inplace=True) if start_dt_str: opdata = opdata.loc[opdata['Time'] >= start_dt, ] if end_dt_str: opdata = opdata.loc[opdata['Time'] < end_dt + timedelta(days=1), ] # If returning all as a single dataframe, merge the result in loop (or initialize dataframe) if combine_all: # Rename Value variable to its corresponding Sensor ID opdata.rename(columns={'Value': sid}, inplace=True) if not len(opdata_all): opdata_all = opdata else: opdata_all = opdata_all.merge(opdata[['Time', sid]], on='Time', how='outer') # Otherwise, load output to dictionary else: opdata_all['{0}_{1}_{2}_{3}_AVERAGES'.format( stype, sid, tperiod, ttype)] = opdata if combine_all and output_csv: os.chdir(outdir) op_fname = '_'.join(sids + [str(tperiod) for tperiod in tperiods]) + '.csv' opdata_all.to_csv(op_fname, index=False, encoding='utf-8') return opdata_all
def __init__(self, start_dt_str, end_dt_str, ip_path): self.start_dt = dt.strptime(start_dt_str, '%m-%d-%y') self.end_dt = dt.strptime(end_dt_str, '%m-%d-%y') + timedelta(days=1) self.data_dir = cut.get_dirs()[0] self.ip_path = ip_path