def process_data(tableName='DailyLogResponses'):

    # Get the log data from gsheets
    logdata = cut.get_gsheet_data(tableName)
    # Eliminate special characters (':-?[]()') and replace spaces with '_'
    colnamesRaw = logdata.columns.values
    colnamesCln = [clean_varname(colname) for colname in colnamesRaw]
    # Replace columns names of dataset with clean column names
    logdata.columns = colnamesCln

    # SQL command strings for sqlite3
    colnamesStr = ','.join(colnamesCln[1:])
    colInsStr = ','.join(['?'] * len(colnamesCln))
    create_str = """
		CREATE TABLE IF NOT EXISTS {0} (Timestamp CHAR PRIMARY KEY, {1})
	""".format(tableName, colnamesStr)
    ins_str = """
		INSERT OR REPLACE INTO {0} (Timestamp,{1})
		VALUES ({2})
	""".format(tableName, colnamesStr, colInsStr)

    # Change to data directory
    data_dir = cut.get_dirs()[0]
    os.chdir(data_dir)
    # Create SQL object connection
    conn = sqlite3.connect('cr2c_fielddata.db')
    # Create table if it doesn't exist
    conn.execute(create_str)
    # Insert aggregated values for the sid and time period
    conn.executemany(ins_str, logdata.to_records(index=False).tolist())
    conn.commit()

    # Close Connection
    conn.close()
Beispiel #2
0
def get_data(varNames = None, start_dt_str = None, end_dt_str = None, output_csv = False, outdir = None):

	# Convert date string inputs to dt variables
	if start_dt_str:
		start_dt = dt.strptime(start_dt_str, '%m-%d-%y')
	if end_dt_str:
		end_dt = dt.strptime(end_dt_str, '%m-%d-%y')

	tableNames = ['DailyLogResponses','DailyLogResponsesV2']

	# Load data from SQL
	data_dir = cut.get_dirs()[0]
	os.chdir(data_dir)
	conn = sqlite3.connect('cr2c_fielddata.db')

	if varNames:
		varNames = [varName.upper() for varName in varNames]
		varNamesAll = 'Timestamp,' + ','.join(varNames)
	else:
		varNamesAll = '*'

	fielddata = pd.DataFrame([])

	for tableName in tableNames:

		fielddata = pd.concat(
			[
				fielddata,
				pd.read_sql(
					'SELECT {0} FROM {1}'.format(varNamesAll, tableName), 
					conn, 
					coerce_float = True
				)
			],
			axis = 0,
			join = 'outer'
		)
	# Dedupe data (some issue with duplicates)
	fielddata.drop_duplicates(inplace = True)
	# Convert Date_Time variable to a pd datetime and eliminate missing values
	fielddata['Timestamp'] = pd.to_datetime(fielddata['Timestamp'])
	fielddata.dropna(subset = ['Timestamp'], inplace = True)

	if start_dt_str:
		fielddata = fielddata.loc[fielddata['Date_Time'] >= start_dt,:]
	if end_dt_str:
		fielddata = fielddata.loc[fielddata['Date_Time'] <= end_dt + timedelta(days = 1),:]

	# Output csv if desired
	if output_csv:
		if varNames:
			op_fname = '{0}.csv'.format(','.join(varNames))
		else:
			op_fname = 'cr2c-fieldData.csv'
		os.chdir(outdir)
		fielddata.to_csv(op_fname, index = False, encoding = 'utf-8')

	return fielddata
Beispiel #3
0
	def __init__(self, verbose = False):
		
		self.ltype_list = \
			['PH','COD','TSS_VSS','ALKALINITY','VFA','GASCOMP','AMMONIA','SULFATE','TKN','BOD']
		self.min_feas_dt = dt.strptime('6-1-16', '%m-%d-%y')
		self.file_dt = dt.now()
		self.file_dt_str = dt.strftime(self.file_dt,'%m-%d-%y')
		self.data_dir, self.pydir = cut.get_dirs()
		self.log_dir = os.path.join(self.data_dir,'Logs')
		self.verbose = verbose
Beispiel #4
0
def get_table_names():

    # Create connection to SQL database
    data_dir = cut.get_dirs()[0]
    os.chdir(data_dir)
    conn = sqlite3.connect('cr2c_opdata.db')
    cursor = conn.cursor()
    # Execute
    cursor.execute(""" SELECT name FROM sqlite_master WHERE type ='table'""")

    return [names[0] for names in cursor.fetchall()]
Beispiel #5
0
def get_data(
	ltypes, 
	start_dt_str = None, end_dt_str = None, output_csv = False, outdir = None
):

	# Convert date string inputs to dt variables
	if start_dt_str:
		start_dt = dt.strptime(start_dt_str, '%m-%d-%y')
	if end_dt_str:
		end_dt = dt.strptime(end_dt_str, '%m-%d-%y')

	# Load data from SQL
	data_dir = cut.get_dirs()[0]
	os.chdir(data_dir)
	conn = sqlite3.connect('cr2c_labdata.db')

	# Loop through types of lab data types (ltypes)
	ldata_all = {}
	for ltype in ltypes:

		# Clean user input wrt TSS_VSS
		if ltype.find('TSS') >= 0 or ltype.find('VSS') >= 0:
			ltype = 'TSS_VSS'

		ldata_long = pd.read_sql(
			'SELECT * FROM {0}'.format(ltype), 
			conn, 
			coerce_float = True
		)

		# Dedupe data (some issue with duplicates)
		ldata_long.drop_duplicates(inplace = True)
		# Convert Date_Time variable to a pd datetime and eliminate missing values
		ldata_long.loc[:,'Date_Time'] = pd.to_datetime(ldata_long['Date_Time'])
		ldata_long.dropna(subset = ['Date_Time'], inplace = True)
		# Filter to desired dates
		ldata_long.drop('DKey', axis = 1, inplace = True)
		if start_dt_str:
			ldata_long = ldata_long.loc[ldata_long['Date_Time'] >= start_dt,:]
		if end_dt_str:
			ldata_long = ldata_long.loc[ldata_long['Date_Time'] <= end_dt + timedelta(days = 1),:]
		
		# Output csv if desired
		if output_csv:
			os.chdir(outdir)
			ldata_long.to_csv(ltype + '.csv', index = False, encoding = 'utf-8')

		# Write to dictionary
		ldata_all[ltype] = ldata_long

	return ldata_all
Beispiel #6
0
import cr2c_fielddata as fld
import cr2c_validation as val
import cr2c_utils as cut

## Dash/Plotly
import dash
import dash_html_components as html
import dash_core_components as dcc
import plotly.graph_objs as go
from dash.dependencies import Input, Output, State, Event
from flask_caching import Cache

# Initialize dash app
app = dash.Dash(__name__)
# Initialize flask cache
cache_dir = os.path.join(cut.get_dirs()[0], 'flask-cache')
cacheConfig = {
    # Probably preferable to use redis in the future
    'CACHE_TYPE': 'filesystem',
    'CACHE_DIR': cache_dir
}
cache = Cache()
timeout = 300
cache.init_app(app.server, config=cacheConfig)
app.config['suppress_callback_exceptions'] = True
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True

#================= Create datetimea layout map from existing data =================#

lab_types = [
Beispiel #7
0
def get_data(stypes,
             sids,
             tperiods,
             ttypes,
             combine_all=True,
             year_sub=None,
             month_sub=None,
             start_dt_str=None,
             end_dt_str=None,
             output_csv=False,
             outdir=None):

    # Convert date string inputs to dt variables
    start_dt = dt.strptime('5-10-17', '%m-%d-%y')
    end_dt = dt.now()
    if start_dt_str:
        start_dt = dt.strptime(start_dt_str, '%m-%d-%y')
    if end_dt_str:
        end_dt = dt.strptime(end_dt_str, '%m-%d-%y')

    # Find Operational Data directory and change working directory
    data_dir = cut.get_dirs()[0]
    os.chdir(data_dir)

    # Initialize output data variable
    if combine_all:
        opdata_all = pd.DataFrame()
    else:
        opdata_all = {}

    # Manage data selection input
    nsids = len(sids)
    if nsids != len(stypes) or nsids != len(tperiods) or nsids != len(ttypes):
        print(
            'Error in cr2c_opdata: get_data: The lengths of the sids, stypes, tperiods and ttypes arguments must be equal'
        )
        sys.exit()

    # Manage month and year subset input (will be added to sqlite3 query string)
    sub_ins = ''
    if year_sub and month_sub:
        sub_ins = 'WHERE YEAR == {} AND Month == {}'.format(
            year_sub, month_sub)
    elif month_sub:
        sub_ins = 'WHERE Month == {}'.format(month_sub)
    elif year_sub:
        sub_ins = 'WHERE Year == {}'.format(year_sub)

    for sid, stype, tperiod, ttype in zip(sids, stypes, tperiods, ttypes):

        sql_str = """
			SELECT distinct * FROM {0}_{1}_{2}_{3}_AVERAGES {4}
			order by Time 
		""".format(stype, sid, tperiod, ttype, sub_ins)

        # Open connection and read to pandas dataframe
        conn = sqlite3.connect('cr2c_opdata.db')
        opdata = pd.read_sql(sql_str, conn, coerce_float=True)

        # Format the time variable
        opdata['Time'] = pd.to_datetime(opdata['Time'])
        # Set time variable
        if ttype == 'HOUR':
            opdata.loc[:,
                       'Time'] = opdata['Time'].values.astype('datetime64[h]')
        elif ttype == 'MINUTE':
            opdata.loc[:,
                       'Time'] = opdata['Time'].values.astype('datetime64[m]')

        # Drop duplicates (happens with hourly aggregates sometimes...)
        opdata.drop_duplicates(['Time'], inplace=True)

        if start_dt_str:
            opdata = opdata.loc[opdata['Time'] >= start_dt, ]
        if end_dt_str:
            opdata = opdata.loc[opdata['Time'] < end_dt + timedelta(days=1), ]

        # If returning all as a single dataframe, merge the result in loop (or initialize dataframe)
        if combine_all:

            # Rename Value variable to its corresponding Sensor ID
            opdata.rename(columns={'Value': sid}, inplace=True)
            if not len(opdata_all):
                opdata_all = opdata
            else:
                opdata_all = opdata_all.merge(opdata[['Time', sid]],
                                              on='Time',
                                              how='outer')

        # Otherwise, load output to dictionary
        else:
            opdata_all['{0}_{1}_{2}_{3}_AVERAGES'.format(
                stype, sid, tperiod, ttype)] = opdata

    if combine_all and output_csv:

        os.chdir(outdir)
        op_fname = '_'.join(sids + [str(tperiod)
                                    for tperiod in tperiods]) + '.csv'
        opdata_all.to_csv(op_fname, index=False, encoding='utf-8')

    return opdata_all
Beispiel #8
0
    def __init__(self, start_dt_str, end_dt_str, ip_path):

        self.start_dt = dt.strptime(start_dt_str, '%m-%d-%y')
        self.end_dt = dt.strptime(end_dt_str, '%m-%d-%y') + timedelta(days=1)
        self.data_dir = cut.get_dirs()[0]
        self.ip_path = ip_path