def generate_appliances_traces(schema, table, appliances, dataid, sample_rate=None, verbose=True): ''' Return a list of appliance traces by dataid. Each trace is in decimal form and in average Watts. ''' global schema_names, source schema_name = schema_names[schema] query = 'select {0},{1} from "{2}".{3} where dataid={4}'.format( ','.join(appliances), time_columns[schema], schema_name, table, dataid) if verbose: print query df = get_dataframe(query) df = df.rename(columns={time_columns[schema]: 'time'}) utils.create_datetimeindex(df) traces = [] for appliance in appliances: series = pd.Series(df[appliance], name=appliance).fillna(0) metadata = { 'source': source, 'schema': schema, 'table': table, 'dataid': dataid, 'device_name': series.name, } trace = ApplianceTrace(series, metadata) if sample_rate: trace = trace.resample(sample_rate) traces.append(trace) return traces
def generate_appliances_traces( schema,table,appliances,dataid,sample_rate=None,verbose=True): ''' Return a list of appliance traces by dataid. Each trace is in decimal form and in average Watts. ''' global schema_names, source schema_name = schema_names[schema] query= 'select {0},{1} from "{2}".{3} where dataid={4}'.format( ','.join(appliances), time_columns[schema], schema_name, table, dataid) if verbose: print query df = get_dataframe(query) df = df.rename(columns={time_columns[schema]: 'time'}) utils.create_datetimeindex(df) traces = [] for appliance in appliances: series = pd.Series(df[appliance],name = appliance).fillna(0) metadata = {'source':source, 'schema':schema, 'table':table , 'dataid':dataid, 'device_name':series.name, } trace = ApplianceTrace(series,metadata) if sample_rate: trace = trace.resample(sample_rate) traces.append(trace) return traces
def clean_dataframe(df, schema, drop_cols): ''' Cleans a dataframe queried directly from the database by renaming the db time column (ex. UTC_15MIN) to a column name 'time'. It then converts the time column to datetime objects and reindexes the dataframe to the time column before dropping that column from the dataframe. It also drops any columns included in the list drop_cols. The columns 'id' and 'dataid' are also dropped. ''' # change the time column name global time_columns df = df.rename(columns={time_columns[schema]: 'time'}) # use a DatetimeIndex utils.create_datetimeindex(df) # drop unnecessary columns df = df.drop(['dataid'], axis=1) if schema == 'curated': df = df.drop(['id'], axis=1) if len(drop_cols) != 0: df = df.drop(drop_cols, axis=1) return df
def clean_dataframe(df,schema,drop_cols): ''' Cleans a dataframe queried directly from the database by renaming the db time column (ex. UTC_15MIN) to a column name 'time'. It then converts the time column to datetime objects and reindexes the dataframe to the time column before dropping that column from the dataframe. It also drops any columns included in the list drop_cols. The columns 'id' and 'dataid' are also dropped. ''' # change the time column name global time_columns df = df.rename(columns={time_columns[schema]: 'time'}) # use a DatetimeIndex utils.create_datetimeindex(df) # drop unnecessary columns df = df.drop(['dataid'], axis=1) if schema == 'curated': df = df.drop(['id'], axis=1) if len(drop_cols)!=0: df= df.drop(drop_cols,axis=1) return df