Esempio n. 1
0
def generate_traces_by_table_and_dataid(schema,
                                        table,
                                        dataid,
                                        sample_rate=None):
    '''
    Returns a list of traces for one house and one month
    '''
    global schema_names, invalid_columns, source
    if schema not in ['curated', 'raw', 'shared']:
        raise SchemaError(schema)
    schema_name = schema_names[schema]
    query = 'select * from "{0}".{1} where dataid={2}'\
        .format(schema_name, table, dataid)
    # TODO NEED TO CHANGE IDS
    # TODO error checking that query worked
    df = get_dataframe(query).fillna(0)

    df = clean_dataframe(df, schema, [])
    traces = []
    for col in df.columns:
        if not col in invalid_columns[schema]:
            s = pd.Series(df[col], name=col)
            meta = {
                'source': source,
                'schema': schema,
                'table': table,
                'dataid': dataid,
                'device_name': s.name
            }
            trace = ApplianceTrace(s, meta)
            if sample_rate:
                trace = trace.resample(sample_rate)
            traces.append(ApplianceTrace(s, meta))
    return traces
Esempio n. 2
0
def generate_appliances_traces(schema,
                               table,
                               appliances,
                               dataid,
                               sample_rate=None,
                               verbose=True):
    '''
    Return a list of appliance traces by dataid. Each trace is in decimal form
    and in average Watts.
    '''
    global schema_names, source
    schema_name = schema_names[schema]
    query = 'select {0},{1} from "{2}".{3} where dataid={4}'.format(
        ','.join(appliances), time_columns[schema], schema_name, table, dataid)
    if verbose:
        print query
    df = get_dataframe(query)
    df = df.rename(columns={time_columns[schema]: 'time'})
    utils.create_datetimeindex(df)
    traces = []
    for appliance in appliances:
        series = pd.Series(df[appliance], name=appliance).fillna(0)
        metadata = {
            'source': source,
            'schema': schema,
            'table': table,
            'dataid': dataid,
            'device_name': series.name,
        }
        trace = ApplianceTrace(series, metadata)
        if sample_rate:
            trace = trace.resample(sample_rate)
        traces.append(trace)
    return traces
def generate_appliances_traces(
        schema,table,appliances,dataid,sample_rate=None,verbose=True):
    '''
    Return a list of appliance traces by dataid. Each trace is in decimal form
    and in average Watts.
    '''
    global schema_names, source
    schema_name = schema_names[schema]
    query= 'select {0},{1} from "{2}".{3} where dataid={4}'.format(
        ','.join(appliances), time_columns[schema], schema_name, table, dataid)
    if verbose:
        print query
    df = get_dataframe(query)
    df = df.rename(columns={time_columns[schema]: 'time'})
    utils.create_datetimeindex(df)
    traces = []
    for appliance in appliances:
        series = pd.Series(df[appliance],name = appliance).fillna(0)
        metadata = {'source':source,
                    'schema':schema,
                    'table':table ,
                    'dataid':dataid,
                    'device_name':series.name,
                    }
        trace = ApplianceTrace(series,metadata)
        if sample_rate:
            trace = trace.resample(sample_rate)
        traces.append(trace)
    return traces
def generate_traces_by_table_and_dataid(schema,table,dataid,sample_rate=None):
    '''
    Returns a list of traces for one house and one month
    '''
    global schema_names,invalid_columns,source
    if schema not in ['curated','raw','shared']:
        raise SchemaError(schema)
    schema_name = schema_names[schema]
    query = 'select * from "{0}".{1} where dataid={2}'\
        .format(schema_name, table, dataid)
    # TODO NEED TO CHANGE IDS
    # TODO error checking that query worked
    df = get_dataframe(query).fillna(0)

    df= clean_dataframe(df, schema,[])
    traces = []
    for col in df.columns:
        if not col in invalid_columns[schema]:
            s = pd.Series(df[col],name = col)
            meta={'source':source,
                'schema':schema,
                'table':table ,
                'dataid':dataid,
                'device_name':s.name
                }
            trace = ApplianceTrace(s,meta)
            if sample_rate:
                trace = trace.resample(sample_rate)
            traces.append(ApplianceTrace(s,meta))
    return traces
Esempio n. 5
0
def get_trace_from_intermediate_xml(xml_house):
    '''
        Assumes you have a dict with interval readings. Assigns an arbitrary dataid.
    '''
    dates = [x[0] for x in xml_house['interval_readings']]
    values = [x[1]['value'] for x in xml_house['interval_readings']]
    return ApplianceTrace(pd.Series(values, index = dates),xml_house['meta'])
Esempio n. 6
0
def generate_trace(start,periods,freq):
    """
    Returns a randomly generated appliance trace for a particular time period.
    Ex) generate_trace(datetime.datetime(2013,1,1),96,'15T'))
    """
    rng = pd.date_range(start, periods=periods, freq=freq)
    series = pd.Series(np.random.randn(periods) + 1, index=rng)

    return ApplianceTrace(series,{"source": "generated"})
Esempio n. 7
0
def generate_trace_by_dataid(homes,dataid):
    '''
    Returns a trace.
    '''
    house = homes[dataid]
    dates = [x[0] for x in house['interval_readings']]
    values = [x[1]['value'] for x in house['interval_readings']]
    
    return ApplianceTrace(pd.Series(values, index = dates),house['meta'])
Esempio n. 8
0
def generate_refrigerator_trace(start,periods,freq):
    """
    Uses hmm parameters learned from Pecan Street data to generate samples
    for a fairly normal cycling refrigerator.

    Uses a three state HMM.
    """
    rng = pd.date_range(start, periods, freq)
    values = np.zeros(periods)
    series = pd.Series(values,index=rng)

    return ApplianceTrace(series,{"source": "fhmm generated"})
def get_trace(xml_string):
    '''
    Returns an ApplianceTrace representing the data in the XML file, which
    must conform to the GreenButtonXML format.
    '''
    # if not _validate(xml_string):
    #     raise InvalidXMLError

    try:
        xmldoc = minidom.parseString(xml_string)
        values = xmldoc.getElementsByTagName('value')
        datetimes = xmldoc.getElementsByTagName('start')
        # TODO - more intelligently handle assumption about duration -> freq
        frequency = int(xmldoc.getElementsByTagName('duration')[1]
                .childNodes[0].nodeValue)
        # remove first extra 'start' time
        datetimes.pop(0)
    except ExpatError:
        print "XML parsing error"

    # extrace values
    values = [v.childNodes[0].nodeValue for v in values]
    datetimes = [datetime.fromtimestamp(int(dt.childNodes[0].nodeValue))
                 for dt in datetimes]

    series = pd.Series(values,index=datetimes)
    metadata = {'source': 'GreenButtonXML'}
    trace = ApplianceTrace(series,metadata)

    # TODO - be more flexible
    # set sample rate
    if frequency == 60 * 60:
        trace = trace.resample('H')
    elif frequency == 60 * 30:
        trace = trace.resample('30T')
    elif frequency == 60 * 15:
        trace = trace.resample('15T')
    elif frequency == 60:
        trace = trace.resample('T')

    return trace