def generate_appliances_traces(schema, table, appliances, dataid, sample_rate=None, verbose=True): ''' Return a list of appliance traces by dataid. Each trace is in decimal form and in average Watts. ''' global schema_names, source schema_name = schema_names[schema] query = 'select {0},{1} from "{2}".{3} where dataid={4}'.format( ','.join(appliances), time_columns[schema], schema_name, table, dataid) if verbose: print query df = get_dataframe(query) df = df.rename(columns={time_columns[schema]: 'time'}) utils.create_datetimeindex(df) traces = [] for appliance in appliances: series = pd.Series(df[appliance], name=appliance).fillna(0) metadata = { 'source': source, 'schema': schema, 'table': table, 'dataid': dataid, 'device_name': series.name, } trace = ApplianceTrace(series, metadata) if sample_rate: trace = trace.resample(sample_rate) traces.append(trace) return traces
def generate_traces_by_table_and_dataid(schema, table, dataid, sample_rate=None): ''' Returns a list of traces for one house and one month ''' global schema_names, invalid_columns, source if schema not in ['curated', 'raw', 'shared']: raise SchemaError(schema) schema_name = schema_names[schema] query = 'select * from "{0}".{1} where dataid={2}'\ .format(schema_name, table, dataid) # TODO NEED TO CHANGE IDS # TODO error checking that query worked df = get_dataframe(query).fillna(0) df = clean_dataframe(df, schema, []) traces = [] for col in df.columns: if not col in invalid_columns[schema]: s = pd.Series(df[col], name=col) meta = { 'source': source, 'schema': schema, 'table': table, 'dataid': dataid, 'device_name': s.name } trace = ApplianceTrace(s, meta) if sample_rate: trace = trace.resample(sample_rate) traces.append(ApplianceTrace(s, meta)) return traces
def generate_appliances_traces( schema,table,appliances,dataid,sample_rate=None,verbose=True): ''' Return a list of appliance traces by dataid. Each trace is in decimal form and in average Watts. ''' global schema_names, source schema_name = schema_names[schema] query= 'select {0},{1} from "{2}".{3} where dataid={4}'.format( ','.join(appliances), time_columns[schema], schema_name, table, dataid) if verbose: print query df = get_dataframe(query) df = df.rename(columns={time_columns[schema]: 'time'}) utils.create_datetimeindex(df) traces = [] for appliance in appliances: series = pd.Series(df[appliance],name = appliance).fillna(0) metadata = {'source':source, 'schema':schema, 'table':table , 'dataid':dataid, 'device_name':series.name, } trace = ApplianceTrace(series,metadata) if sample_rate: trace = trace.resample(sample_rate) traces.append(trace) return traces
def generate_traces_by_table_and_dataid(schema,table,dataid,sample_rate=None): ''' Returns a list of traces for one house and one month ''' global schema_names,invalid_columns,source if schema not in ['curated','raw','shared']: raise SchemaError(schema) schema_name = schema_names[schema] query = 'select * from "{0}".{1} where dataid={2}'\ .format(schema_name, table, dataid) # TODO NEED TO CHANGE IDS # TODO error checking that query worked df = get_dataframe(query).fillna(0) df= clean_dataframe(df, schema,[]) traces = [] for col in df.columns: if not col in invalid_columns[schema]: s = pd.Series(df[col],name = col) meta={'source':source, 'schema':schema, 'table':table , 'dataid':dataid, 'device_name':s.name } trace = ApplianceTrace(s,meta) if sample_rate: trace = trace.resample(sample_rate) traces.append(ApplianceTrace(s,meta)) return traces
def get_trace(xml_string): ''' Returns an ApplianceTrace representing the data in the XML file, which must conform to the GreenButtonXML format. ''' # if not _validate(xml_string): # raise InvalidXMLError try: xmldoc = minidom.parseString(xml_string) values = xmldoc.getElementsByTagName('value') datetimes = xmldoc.getElementsByTagName('start') # TODO - more intelligently handle assumption about duration -> freq frequency = int(xmldoc.getElementsByTagName('duration')[1] .childNodes[0].nodeValue) # remove first extra 'start' time datetimes.pop(0) except ExpatError: print "XML parsing error" # extrace values values = [v.childNodes[0].nodeValue for v in values] datetimes = [datetime.fromtimestamp(int(dt.childNodes[0].nodeValue)) for dt in datetimes] series = pd.Series(values,index=datetimes) metadata = {'source': 'GreenButtonXML'} trace = ApplianceTrace(series,metadata) # TODO - be more flexible # set sample rate if frequency == 60 * 60: trace = trace.resample('H') elif frequency == 60 * 30: trace = trace.resample('30T') elif frequency == 60 * 15: trace = trace.resample('15T') elif frequency == 60: trace = trace.resample('T') return trace