Exemple #1
0
def add_temperature_long():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT DISTINCT Building_Number, ICAO FROM EUAS_monthly_weather', conn)
    conn.close()
    # print df.head()
    conn = uo.connect('interval_ion')
    with conn:
        ids_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id']
        ids_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id']
    df_id_elec = df[df['Building_Number'].isin(ids_elec)]
    df_id_gas = df[df['Building_Number'].isin(ids_gas)]
    stations = np.union1d(df_id_elec['ICAO'].unique(), df_id_gas['ICAO'].unique())
    with conn:
        df_id_elec.to_sql('electric_id_station', conn, if_exists='replace')
        df_id_gas.to_sql('gas_id_station', conn, if_exists='replace')
    conn_w = uo.connect('weather_hourly_utc')
    print len(stations)
    for s in stations:
        with conn_w:
            ds = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'2011-09-01\' and \'2016-06-28\''.format(s), conn_w)
            print s
        with conn:
            ds.to_sql(s, conn, if_exists='replace')
    conn_w.close()
    conn.close()
Exemple #2
0
def compare_id():
    conn = uo.connect('interval_ion')
    with conn:
        id_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id']
        id_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id']
    conn2 = uo.connect('all')
    with conn2:
        id_gsalink = pd.read_sql(
            'SELECT * FROM EUAS_ecm WHERE high_level_ECM = \'GSALink\'',
            conn2)['Building_Number']
    print len(set(id_gsalink))
    print len(set(id_elec).intersection(set(id_gsalink)))
    print len(set(id_gas).intersection(set(id_gsalink)))
    print len(
        set(id_elec).intersection(set(id_gas)).intersection(set(id_gsalink)))
Exemple #3
0
def change_area():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql(
            'SELECT Building_Number, [Gross_Sq.Ft] FROM EUAS_area', conn)
    df_max = df.groupby('Building_Number').max()
    df_min = df.groupby('Building_Number').min()
    df_all = pd.merge(df_max,
                      df_min,
                      how='inner',
                      left_index=True,
                      right_index=True,
                      suffixes=['_max', '_min'])
    df_all['diff'] = df_all['Gross_Sq.Ft_max'] - \
                     df_all['Gross_Sq.Ft_min']
    df_all['percent_diff'] = df_all.apply(
        lambda r: np.nan if r['Gross_Sq.Ft_max'] == 0 else
        (1 - r['Gross_Sq.Ft_min'] / r['Gross_Sq.Ft_max']) * 100,
        axis=1)
    df_large = df_all[df_all['percent_diff'] > 10]
    print len(df_large)
    df_large.drop('diff', axis=1, inplace=True)
    print df_large.head()
    df_large.to_csv(homedir + 'question/change_area.csv', index=True)
    return
Exemple #4
0
def facility_vs_building_set(s):
    print s
    conn = uo.connect('all')
    if s == 'AI':
        ids = gbs.get_cat_set(['A', 'I'], conn)
    elif s == 'covered':
        ids = gbs.get_covered_set()
        df_f = pd.read_csv(
            os.getcwd() +
            '/input/FY/covered/Covered_Facilities_All Energy mmBTUs_FY14_EISA07Sec432_input.csv'
        )
        facility_eisa = set(df_f['Facility_Number'].tolist())
        facility_eisa = [x[:8] for x in facility_eisa if type(x) != float]
    f_ids = [x for x in ids if '0000' in x]
    # for x in sorted(f_ids):
    #     print x
    b_ids = [x for x in ids if not '0000' in x]
    print 'total {0}, facility {1}, building {2}'.format(
        len(ids), len(f_ids), len(b_ids))
    bf_ids = ['{0}0000{1}'.format(x[:2], x[-2:]) for x in b_ids]
    print len(common)
    common = (set(bf_ids).intersection(f_ids))
    for y in common:
        print y
        print[x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
    if s == 'covered':
        print 'eisa facility', len(facility_eisa)
        print 'common ids from eisa', len(
            set(f_ids).intersection(facility_eisa))
        print 'different ids from eisa', (set(f_ids).difference(facility_eisa))
        common = (set(f_ids).difference(facility_eisa))
        for y in common:
            print y
            print[x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
Exemple #5
0
def missing_gsalink():
    conn1 = uo.connect('all')
    with conn1:
        df1 = pd.read_sql('SELECT DISTINCT Building_Number FROM EUAS_monthly',
                          conn1)
    conn2 = uo.connect('other_input')
    with conn2:
        df2 = pd.read_sql(
            'SELECT DISTINCT Building_ID AS Building_Number FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates',
            conn2)
    # print len(df2)
    missing = set(df2['Building_Number'].tolist()).difference(
        df1['Building_Number'].tolist())
    print len(missing)
    print
    print missing
Exemple #6
0
def remove_outliers(measure_type):
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    col = value_lb_dict[measure_type]
    conn = uo.connect('interval_ion')
    dfs = []
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    # bs_pair = [x for x in bs_pair if x[0] == 'AL0039AB']
    for i, (b, s) in enumerate(bs_pair):
        print i, b
        with conn:
            df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn)
        # df = df.head(n = 5000)
        # df.info()
        points = df[col]
        outliers = show_outlier(points, b, 'upper', measure_type, 1.5)
        # outliers = show_outlier(points, b, 'pos_roll', measure_type, 1.5)
        # mild, outliers = show_outlier(points, b, 'box', measure_type, 1.5)
        df['outlier'] = outliers
        print len([x for x in outliers if x])
        dfs.append(df)
    df_all = pd.concat(dfs, ignore_index=True)
    print df_all.head()
    with conn:
        df_all.to_sql('{0}_outlier_tag'.format(measure_type),
                      conn, if_exists='replace')
    return
Exemple #7
0
def summary_long(measure_type):
    conn = uo.connect('interval_ion')
    with conn:
        df_id = pd.read_sql('SELECT * FROM {0}_id'.format(measure_type), conn)
    lines = []
    lines.append('Building_Number,neg_count,min,max,median,75percent,min_time,max_time,expect_count,actural_count,missing_count')
    ids = df_id['id'].tolist()
    for name in ids:
        with conn:
            df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, name), conn)
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        cols = list(df)
        cols.remove('Timestamp')
        cols.remove('Building_Number')
        c = cols[0]
        temp = df.dropna(subset=[c])
        neg = temp[c].tolist()
        neg = [x for x in neg if x < 0]
        min_time = temp['Timestamp'].min()
        min_time_str = min_time.strftime('%Y-%m-%d %H:%M:%S')
        max_time = temp['Timestamp'].max()
        max_time_str = max_time.strftime('%Y-%m-%d %H:%M:%S')
        expect_count = int(pd.to_timedelta((max_time - min_time)) / np.timedelta64(15,'m')) + 1
        actural_count = len(temp)
        missing_count = expect_count - actural_count
        mini = temp[c].min()
        maxi = temp[c].max()
        median = temp[c].median()
        q3 = temp[c].quantile(0.75)
        print (','.join([name, str(int(len(neg))), str(mini), str(maxi), str(median), str(q3), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)]))
        lines.append(','.join([name, str(int(len(neg))), str(mini), str(maxi), str(median), str(q3), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)]))
    indir = os.getcwd() + '/input/FY/interval/ion_0627/summary_long/'
    with open (indir + 'summary_{0}.csv'.format(measure_type), 'w+') as wt:
        wt.write('\n'.join(lines))
def remove_outliers(measure_type):
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    col = value_lb_dict[measure_type]
    conn = uo.connect('interval_ion')
    dfs = []
    with conn:
        # df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_id = pd.read_sql('SELECT * FROM {0}_id'.format(measure_type), conn)
    # bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    # bs_pair = [x for x in bs_pair if x[0] == 'AL0039AB']
    ids = df_id['id']
    for i, b in enumerate(ids):
        print i, b
        with conn:
            df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn)
        print df.head()
        # df = df.head(n = 5000)
        # df.info()
        points = df[col]
        outliers = show_outlier(points, b, 'upper', measure_type, 1.5)
        # outliers = show_outlier(points, b, 'pos_roll', measure_type, 1.5)
        # mild, outliers = show_outlier(points, b, 'box', measure_type, 1.5)
        df['outlier'] = outliers
        print len([x for x in outliers if x])
        dfs.append(df)
    df_all = pd.concat(dfs, ignore_index=True)
    print df_all.head()
    with conn:
        df_all.to_sql('{0}_outlier_tag'.format(measure_type),
                      conn, if_exists='replace')
    return
def build_energy_temperature(measure_type):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_area = pd.read_sql('SELECT * FROM area', conn)
        df_tz = pd.read_sql('SELECT Building_Number, rawOffset FROM EUAS_timezone', conn)
    df_tz.set_index('Building_Number', inplace=True)
    df_area.set_index('Building_Number', inplace=True)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    multiplier_dict = {'electric':  3.412, 'gas': 1.026}
    col = value_lb_dict[measure_type]
    m = multiplier_dict[measure_type]
    dfs = []
    # bs_pair = [x for x in bs_pair if x[0] == 'OR0033PE']
    for b, s in bs_pair:
        print b, s
        try:
            area = df_area.ix[b, 'Gross_Sq.Ft']
        except KeyError:
            print 'No area found'
            continue
        offset = df_tz.loc[b, 'rawOffset']
        df = join_interval(offset, b, s, area, col, m, measure_type, conn)
        if len(df) == 0:
            continue
        dfs.append(df)
        df['Building_Number'] = b
    df_all = pd.concat(dfs)
    with conn:
        df_all.to_sql('{0}_wtemp'.format(measure_type),
                      conn, if_exists='replace')
    conn.close()
    print 'end'
Exemple #10
0
def facility_vs_building_set(s):
    print s
    conn = uo.connect('all')
    if s == 'AI':
        ids = gbs.get_cat_set(['A', 'I'], conn)
    elif s == 'covered':
        ids = gbs.get_covered_set()
        df_f = pd.read_csv(os.getcwd() + '/input/FY/covered/Covered_Facilities_All Energy mmBTUs_FY14_EISA07Sec432_input.csv')
        facility_eisa = set(df_f['Facility_Number'].tolist())
        facility_eisa = [x[:8] for x in facility_eisa if type(x) != float]
    f_ids = [x for x in ids if '0000' in x]
    # for x in sorted(f_ids):
    #     print x
    b_ids = [x for x in ids if not '0000' in x]
    print 'total {0}, facility {1}, building {2}'.format(len(ids), len(f_ids), len(b_ids))
    bf_ids = ['{0}0000{1}'.format(x[:2], x[-2:]) for x in b_ids]
    print len(common)
    common = (set(bf_ids).intersection(f_ids))
    for y in common:
        print y
        print [x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
    if s == 'covered':
        print 'eisa facility', len(facility_eisa)
        print 'common ids from eisa', len(set(f_ids).intersection(facility_eisa))
        print 'different ids from eisa', (set(f_ids).difference(facility_eisa))
        common = (set(f_ids).difference(facility_eisa))
        for y in common:
            print y
            print [x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
Exemple #11
0
def check_interval(filename):
    df = pd.read_csv(inputdir + filename)
    df.rename(columns=lambda x: x[:8] if x != 'Timestamp' else x,
              inplace=True)
    df.dropna(axis=1, how='all', inplace=True)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.set_index(pd.DatetimeIndex(df['Timestamp']), inplace=True)
    # df.info()
    df_re = df.resample('M', how='sum')
    cols = list(df_re)
    df_re.reset_index(inplace=True)
    df_long = pd.melt(df_re, id_vars='index', value_vars=cols)
    # print
    # print df_long.head()
    df_long.rename(columns={'index':'Timestamp', 'variable': 'Building_Number', 'value': 'Electricity_(KWH)'}, inplace=True)
    df_long['month'] = df_long['Timestamp'].map(lambda x: x.month)
    df_long['year'] = df_long['Timestamp'].map(lambda x: x.year)
    col_str = ','.join(['\'{0}\''.format(x) for x in cols])
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT Building_Number, year, month, [Electricity_(KWH)] FROM EUAS_monthly WHERE Building_Number IN ({0}) AND year = \'2015\''.format(col_str), conn)
    # print df.head()
    df_long.drop('Timestamp', axis=1, inplace=True)
    df_all = pd.merge(df, df_long, how='left', on=['Building_Number', 'year', 'month'], suffixes=['_EUAS', '_ION'])
    df_all['ratio'] = df_all['Electricity_(KWH)_ION']/df_all['Electricity_(KWH)_EUAS'].map(lambda x: round(x, 3))
    df_all['percent_diff'] = df_all['ratio'].map(lambda x: abs(1 - x) * 100.0)
    # print df_all.head()
    return df_all
Exemple #12
0
def summary_wide(measure_type):
    conn = uo.connect('interval_ion_single')
    with conn:
        df = pd.read_sql('SELECT * FROM {0}'.format(measure_type),
                         conn)
    conn.close()
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    cols = list(df)
    cols.remove('Timestamp')
    lines = []
    lines.append('Building_Number,neg_count,min,max,median,min_time,max_time,expect_count,actural_count,missing_count')
    for c in cols:
        temp = df[['Timestamp', c]].copy()
        temp.dropna(subset=[c], inplace=True)
        neg = temp[c].tolist()
        neg = [x for x in neg if x < 0]
        min_time = temp['Timestamp'].min()
        min_time_str = min_time.strftime('%Y-%m-%d %H:%M:%S')
        max_time = temp['Timestamp'].max()
        max_time_str = max_time.strftime('%Y-%m-%d %H:%M:%S')
        expect_count = int(pd.to_timedelta((max_time - min_time)) / np.timedelta64(15,'m')) + 1
        actural_count = len(temp)
        missing_count = expect_count - actural_count
        mini = temp[c].min()
        maxi = temp[c].max()
        median = temp[c].median()
        print (','.join([c, str(int(len(neg))), str(mini), str(maxi), str(median), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)]))
        lines.append(','.join([c, str(int(len(neg))), str(mini), str(maxi), str(median), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)]))
    indir = os.getcwd() + '/input/FY/interval/ion_0627/summary/'
    with open (indir + 'summary_{0}.csv'.format(measure_type), 'w+') as wt:
        wt.write('\n'.join(lines))
Exemple #13
0
def compare_interval_db(b):
    conn = uo.connect('ION data.db')
    with conn:
        df = read_sql(
            'SELECT * FROM ION_electricity WHERE Building_Number = \'{0}\''.
            format(b))
    df.info()
Exemple #14
0
def read_ion(measure_type, prefix, value_label):
    indir = os.getcwd() + '/input/FY/interval/ion_0627/{0}/'.format(measure_type)
    df1 = read_by_region(indir, prefix + ' 1_2_3_4*', value_label,
                         True)
    # temp = pd.to_datetime(df_melt['Timestamp'][:5])
    df2 = read_by_region(indir, prefix + ' 5_6*', value_label, True)
    # print len(df2), '222'
    df3 = read_by_region(indir, prefix + ' 7_to_11*', value_label,
                         True)
    # print len(df3), '333'
    df_melt = pd.concat([df1, df2, df3], ignore_index=True)
    print len(df_melt)
    df_melt.dropna(subset=[value_label], inplace=True)
    ids = df_melt['Building_Number'].unique()
    df_id = pd.DataFrame({'id': ids})
    print len(df_melt)
    conn = uo.connect('interval_ion')
    with conn:
        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS {0}".format(measure_type))
        df_melt.to_sql(measure_type, conn, if_exist='replace')
        df_id.to_sql('{0}_id'.format(measure_type), conn,
                     if_exist='replace')
    conn.close()
    print 'end'
Exemple #15
0
def table_for_robust_set():
    conn = uo.connect('all')
    study_set = gbs.get_energy_set('eui').intersection(gbs.get_cat_set(['A', 'I'], conn))
    df = pd.read_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving.csv')
    df = df[df['Building_Number'].isin(study_set)]
    df.sort('Building_Number', inplace=True)
    df.to_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving_robustset.csv', index=False)
    return
Exemple #16
0
def check_hourly(b, measure_type):
    conn = uo.connect('interval_ion')
    euas_dict = {'electric': 'Electricity_(KWH)', 'gas': 'Gas_(Cubic_Ft)'}
    ion_dict = {'electric': 'Electric_(KWH)', 'gas':'Gas_(CubicFeet)'}
    with conn:
        df1 = pd.read_sql('SELECT * FROM {1} WHERE Building_Number = \'{0}\''.format(b, measure_type), conn)
    conn.close()
    df1['Date'] = pd.DatetimeIndex(pd.to_datetime(df1['Timestamp']))
    df1.set_index(df1['Date'], inplace=True)
    df1_re = df1.resample('M', 'sum')
    df1_re['month'] = df1_re.index.month
    df1_re['year'] = df1_re.index.year
    df1_re.reset_index(inplace=True)
    conn = uo.connect('all')
    with conn:
        df2 = pd.read_sql('SELECT Building_Number, year, month, [{1}] FROM EUAS_monthly WHERE Building_Number = \'{0}\''.format(b, euas_dict[measure_type]), conn)
    if len(df1) == 0 or len(df2) == 0:
        return
    df_all = pd.merge(df1_re, df2, on=['year', 'month'], how='left')
    df_all.set_index(pd.DatetimeIndex(pd.to_datetime(df_all['Date'])), inplace=True)
    df_all.drop('Date', axis=1, inplace=True)
    df_all.rename(columns={ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS'}, inplace=True)

    df_inn = pd.merge(df1_re, df2, on=['year', 'month'], how='inner')
    df_inn.set_index(pd.DatetimeIndex(pd.to_datetime(df_inn['Date'])), inplace=True)
    df_inn.drop('Date', axis=1, inplace=True)
    df_inn.rename(columns={ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS'}, inplace=True)
    df_inn[b] = df_inn['ION']/df_inn['EUAS']
    # df_inn.to_csv(homedir + 'temp/{0}_{1}_ion_euas.csv'.format(b, measure_type)) # temp check the data
    dsc = df_inn[[b]].describe().transpose()
    dsc['overall'] = df_inn['ION'].sum()/df_inn['EUAS'].sum()
    sns.set_context("talk", font_scale=1.0)
    sns.set_palette(sns.color_palette('Set2'))
    line1, = plt.plot(df_all.index, df_all['ION'], '-o')
    line2, = plt.plot(df_all.index, df_all['EUAS'], '-o')
    plt.legend([line1, line2], ['ION', 'EUAS'], loc='center left', bbox_to_anchor=(1, 0.5))
    plt.title('{0} {1} ION vs EUAS monthly'.format(b, measure_type), fontsize=30)
    if measure_type == 'electric':
        plt.ylabel('KWH')
    else:
        plt.ylabel('Cubic Feet')
    # plt.show()
    path = os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/{0}_{1}.png'.format(b, measure_type)
    P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
    plt.close()
    return dsc
def fit_time(measure_type, occtime, season=None):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_area = pd.read_sql('SELECT * FROM area', conn)
        df_tz = pd.read_sql('SELECT Building_Number, rawOffset FROM EUAS_timezone', conn)
    df_tz.set_index('Building_Number', inplace=True)
    df_area.set_index('Building_Number', inplace=True)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    multiplier_dict = {'electric':  3.412, 'gas': 1.026}
    col = value_lb_dict[measure_type]
    m = multiplier_dict[measure_type]
    ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'}
    print len(bs_pair)
    sns.set_style("whitegrid")
    # palette = sns.cubehelix_palette(len(bs_pair))
    palette = sns.color_palette('husl', len(bs_pair))
    sns.set_palette(palette)
    colors_rgb = [util.float2hex(x) for x in palette]
    sns.set_context("talk", font_scale=1)
    jsondir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/json_{0}/'.format(occtime)
    # csvdir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/csv/'
    for i, (b, s) in enumerate(bs_pair):
        print b, s
        try:
            area = df_area.ix[b, 'Gross_Sq.Ft']
        except KeyError:
            print 'No area found'
            continue
        offset = df_tz.loc[b, 'rawOffset']
        df = join_interval(offset, b, s, area, col, m, measure_type, conn, season=season)
        df.to_csv(homedir + 'temp/{0}.csv'.format(b))
        df = df[df[col] >= 0]
        points = df[col]
        outliers = show_outlier(points, b, 'upper', measure_type, 1.5)
        df['outlier'] = outliers
        df = df[~np.array(outliers)]
        df['status_week_day_night'] = \
            df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1)
        min_time = df['Timestamp'].min()
        max_time = df['Timestamp'].max()
        gr = df.groupby('status_week_day_night')
        bx = plt.axes()
        d0 = plot_piece(gr, bx, occtime, colors_rgb[i], measure_type, b, s, scatter=False, annote=True, jsondir=jsondir, season=season)
    plt.xlabel('Temperature_F')
    # plt.show()
    if season is None:
        path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}.png'.format(measure_type, occtime)
    else:
        path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}_{2}.png'.format(measure_type, occtime, season)
    P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
    shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise_all', 'plot_FY_weather/html/interval/lean/all'))
    plt.close()
    return
Exemple #18
0
def ecm_program_no_date():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT * FROM EUAS_ecm_program', conn)
    df = df[df['ECM_program'].notnull()]
    df.drop_duplicates(cols=['Building_Number', 'ECM_program'], inplace=True)
    df.rename(columns={'ECM_program': 'energy_program'}, inplace=True)
    df.to_csv(homedir + 'question/program_date.csv', index=False)
    return
Exemple #19
0
def ecm_program_no_date():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT * FROM EUAS_ecm_program', conn)
    df = df[df['ECM_program'].notnull()]
    df.drop_duplicates(cols=['Building_Number', 'ECM_program'], inplace=True)
    df.rename(columns={'ECM_program': 'energy_program'}, inplace=True)
    df.to_csv(homedir + 'question/program_date.csv', index=False)
    return
Exemple #20
0
def plot_trend_fromdb(b, s, breakpoints):
    conn = uo.connect('all')
    with conn:
        df_all = pd.read_sql(
            'SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''
            .format(b), conn)
    conn.close()
    df_all.sort(['year', 'month'], inplace=True)
    df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']]
    df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)]
    print type(df_all['eui_gas'].tolist()[0])
    byyear = df_all.groupby(['year']).sum()
    byyear.index = byyear.index.map(lambda x: datetime.datetime(int(x), 1, 1))
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    bx = plt.axes()
    gas_line_color = '#DE4A50'
    elec_line_color = '#429CD5'
    hdd = byyear['hdd65'].tolist()
    hdd = [int(round(x, 0)) for x in hdd]
    cdd = byyear['cdd65'].tolist()
    cdd = [int(round(x, 0)) for x in cdd]
    bx = plt.axes()
    line1, = plt.plot(byyear.index,
                      byyear['eui_gas'],
                      ls='-',
                      lw=2,
                      marker='o',
                      color=gas_line_color)
    line2, = plt.plot(byyear.index,
                      byyear['eui_elec'],
                      ls='-',
                      lw=2,
                      marker='o',
                      color=elec_line_color)
    for m, n, d in zip(byyear.index, byyear['eui_gas'], hdd):
        bx.annotate('HDD\n{0}'.format(d), xy=(m, n))
    for m, n, d in zip(byyear.index, byyear['eui_elec'], cdd):
        bx.annotate('CDD\n{0}'.format(d), xy=(m, n))
    for bp in breakpoints:
        x = pd.to_datetime([pd.to_datetime(bp)] * 2)
        y = bx.get_ylim()
        plt.plot(x, y, ls='--', lw=2, color='gray')
    plt.legend([line1, line2], ['Gas', 'Electric'],
               loc='center left',
               bbox_to_anchor=(1, 0.5),
               prop={'size': 10})
    plt.title("Electric EUI and Gas EUI Trend")
    plt.ylabel("[kBtu/sq.ft/year]")
    plt.xlabel("Calendar Year")
    P.savefig(
        os.getcwd() +
        '/plot_FY_weather/html/single_building/trend/{0}_{1}_year.png'.format(
            b, s),
        dpi=70)
    plt.close()
    return
Exemple #21
0
def add_area():
    conn = uo.connect('all')
    with conn:
        df_area = pd.read_sql('SELECT DISTINCT Building_Number, [Gross_Sq.Ft] FROM EUAS_area WHERE Fiscal_Year > 2010', conn)
    conn.close()
    conn = uo.connect('interval_ion')
    with conn:
        ids_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id']
        ids_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id']
    ids = np.union1d(ids_elec, ids_gas)
    df_area = df_area[df_area['Building_Number'].isin(ids)]
    print len(df_area)
    print df_area.head(n = 20)
    change_area = df_area.groupby('Building_Number').filter(lambda x: len(x) > 2)
    assert(len(change_area) == 0)
    with conn:
        df_area.to_sql('area', conn, if_exists='replace')
    conn.close()
Exemple #22
0
def count_db_entries(db):
    conn = uo.connect(db)
    c = conn.cursor()
    tables = util.get_list_tables(c)
    acc = 0
    for t in tables:
        c.execute("SELECT COUNT (*) FROM {0};".format(t))
        acc += c.fetchone()[0]
    print acc
    return
Exemple #23
0
def check_0711():
    conn = uo.connect('interval_ion')
    # check_match(conn, 'electric')
    check_match(conn, 'gas')
    conn.close()
    # keys = ['mean', 'std', 'min', '25%', '50%', '75%', 'max', 'overall']
    # format_dict = {k: lambda x: '{0:.2f}'.format(x) for k in keys}
    # uo.csv2html(os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/electric_ratio.csv', {'Unnamed: 0': 'Building_Number'}, format_dict)
    # uo.csv2html(os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/gas_ratio.csv', {'Unnamed: 0': 'Building_Number'}, format_dict)
    return
Exemple #24
0
def num_invest():
    conn = uo.connect('all')
    studyset = gbs.get_650_set(conn)
    df1 = pd.DataFrame({'Building_Number': list(studyset)})
    with conn:
        df2 = pd.read_sql('SELECT * FROM EUAS_invest_nona', conn)
    df = pd.merge(df1, df2, on='Building_Number', how='left')
    df.groupby('Building_Number').count().to_csv(homedir +
                                                 'temp/invest_cnt_all.csv')
    cnt = df.groupby('Building_Number').count()
    print cnt.groupby('investment').count()
Exemple #25
0
def missing_area():
    conn = uo.connect('interval_ion')
    with conn:
        df_area = pd.read_sql('SELECT * FROM area', conn)
        id_elec = pd.read_sql('SELECT id FROM electric_id', conn)
        id_gas = pd.read_sql('SELECT id FROM gas_id', conn)
    ids = np.union1d(id_elec['id'], id_gas['id'])
    missing = np.setdiff1d(ids, df_area['Building_Number'])
    print
    for x in missing:
        print x
Exemple #26
0
def missing_area():
    conn = uo.connect('interval_ion')
    with conn:
        df_area = pd.read_sql('SELECT * FROM area', conn)
        id_elec = pd.read_sql('SELECT id FROM electric_id', conn)
        id_gas = pd.read_sql('SELECT id FROM gas_id', conn)
    ids = np.union1d(id_elec['id'], id_gas['id'])
    missing = np.setdiff1d(ids, df_area['Building_Number'])
    print
    for x in missing:
        print x
Exemple #27
0
def table_for_robust_set():
    conn = uo.connect('all')
    study_set = gbs.get_energy_set('eui').intersection(
        gbs.get_cat_set(['A', 'I'], conn))
    df = pd.read_csv(os.getcwd() +
                     '/plot_FY_weather/html/table/action_saving.csv')
    df = df[df['Building_Number'].isin(study_set)]
    df.sort('Building_Number', inplace=True)
    df.to_csv(os.getcwd() +
              '/plot_FY_weather/html/table/action_saving_robustset.csv',
              index=False)
    return
Exemple #28
0
def fit_time(measure_type, occtime):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_area = pd.read_sql('SELECT * FROM area', conn)
    df_area.set_index('Building_Number', inplace=True)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    multiplier_dict = {'electric':  3.412, 'gas': 1.026}
    col = value_lb_dict[measure_type]
    m = multiplier_dict[measure_type]
    ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'}
    print len(bs_pair)
    sns.set_style("whitegrid")
    # palette = sns.cubehelix_palette(len(bs_pair))
    palette = sns.color_palette('husl', len(bs_pair))
    sns.set_palette(palette)
    colors_rgb = [util.float2hex(x) for x in palette]
    sns.set_context("talk", font_scale=1)
    jsondir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/json_{0}/'.format(occtime)
    # csvdir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/csv/'
    for i, (b, s) in enumerate(bs_pair)[:5]:
        print b, s
        try:
            area = df_area.ix[b, 'Gross_Sq.Ft']
        except KeyError:
            print 'No area found'
            continue
        df = join_interval(b, s, area, col, m, measure_type, conn)
        # df.to_csv(homedir + 'temp/{0}.csv'.format(b))
        df = df[df[col] >= 0]
        points = df[col]
        outliers = show_outlier(points, b, 'upper', measure_type, 5)
        df['outlier'] = outliers
        df = df[~np.array(outliers)]
        df['status_week_day_night'] = \
            df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1)
        min_time = df['Timestamp'].min()
        max_time = df['Timestamp'].max()
        gr = df.groupby('status_week_day_night')
        bx = plt.axes()
        d0 = plot_piece(gr, bx, occtime, colors_rgb[i], measure_type, b, s, scatter=False, annote=True, jsondir=jsondir)
    plt.xlabel('Temperature_F')
    # plt.show()
    path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}.png'.format(measure_type, occtime)
    P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
    shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise_all', 'plot_FY_weather/html/interval/lean/all'))
    plt.close()
    return
Exemple #29
0
def add_temperature_wide():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT DISTINCT Building_Number, ICAO FROM EUAS_monthly_weather', conn)
    conn.close()
    # print df.head()
    conn = uo.connect('interval_ion_single')
    with conn:
        df_elec = pd.read_sql('SELECT * FROM electric', conn)
        df_gas = pd.read_sql('SELECT * FROM gas', conn)
    ids_elec = list(df_elec)
    ids_gas = list(df_gas)
    ids_elec.remove('Timestamp')
    ids_gas.remove('Timestamp')
    df_id_elec = df[df['Building_Number'].isin(ids_elec)]
    df_id_gas = df[df['Building_Number'].isin(ids_gas)]
    stations = np.intersect1d(df_id_elec['ICAO'].unique(), df_id_elec['ICAO'].unique())
    with conn:
        df_id_elec.to_sql('electric_id_station', conn, if_exists='replace')
        df_id_gas.to_sql('gas_id_station', conn, if_exists='replace')
    conn_w = uo.connect('weather_hourly_utc')
    print len(stations)
Exemple #30
0
def study_set_plot():
    conn = uo.connect('all')
    with conn:
        # df1 = pd.read_sql('SELECT DISTINCT Building_Number, Fiscal_Year FROM EUAS_monthly', conn)
        df1 = pd.read_sql(
            'SELECT DISTINCT Building_Number, Fiscal_Year FROM eui_by_fy',
            conn)
        df2 = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category',
                          conn)
    meter_set = gbs.get_action_set('high_level_ECM', ['Advanced Metering'])
    df = pd.merge(df1, df2, on='Building_Number', how='left')
    df = df[df['Fiscal_Year'] > 2006]
    df = df[df['Fiscal_Year'] < 2016]
    df3 = df.groupby('Building_Number').filter(lambda x: len(x) > 5)
    ai_set = gbs.get_cat_set(['A', 'I'], conn)
    # invest = gbs.get_invest_set()[-1]
    invest = gbs.get_ecm_set()
    all_building = set(df3['Building_Number'].tolist())
    print 'all building > 5 years of data: {0}'.format(len(all_building))
    print 'all building > 5 years of data + ecm: {0}'.format(
        len(all_building.intersection(invest)))
    print 'all building > 5 years of data + meter: {0}'.format(
        len(all_building.intersection(meter_set)))

    df4 = df[df['Cat'].isin(
        ['A', 'I'])].groupby('Building_Number').filter(lambda x: len(x) > 5)
    ai_building = set(df4['Building_Number'].tolist())
    print 'A + I building > 5 years of data: {0}'.format(len(ai_building))
    print 'A + I building > 5 years of data + ecm: {0}'.format(
        len(ai_building.intersection(invest)))
    print 'A + I building > 5 years of data + meter: {0}'.format(
        len(ai_building.intersection(meter_set)))

    print 'elec ', len(gbs.get_energy_set('elec').intersection(ai_set))
    print 'elec + ecm', len(
        gbs.get_energy_set('elec').intersection(ai_set).intersection(invest))
    print 'elec + meter', len(
        gbs.get_energy_set('elec').intersection(ai_set).intersection(
            meter_set))

    print 'gas ', len(gbs.get_energy_set('gas').intersection(ai_set))
    print 'gas + ecm', len(
        gbs.get_energy_set('gas').intersection(ai_set).intersection(invest))
    print 'gas + meter', len(
        gbs.get_energy_set('gas').intersection(ai_set).intersection(meter_set))
    print 'eui', len(gbs.get_energy_set('eui').intersection(ai_set))
    print 'eui + ecm', len(
        gbs.get_energy_set('eui').intersection(ai_set).intersection(invest))
    print 'eui + meter', len(
        gbs.get_energy_set('eui').intersection(ai_set).intersection(meter_set))
    return
Exemple #31
0
def ion_gsalink_time():
    df = pd.read_csv(os.getcwd() + \
                     '/input/FY/interval/ion_0627/summary_long/summary_electric.csv')
    df = df[['Building_Number', 'min_time']]
    conn = uo.connect('other_input')
    with conn:
        df_gsalink = pd.read_sql('SELECT Building_ID as Building_Number, Rollout_Date as GSALink_start_time FROM  GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates', conn)
    df_all = pd.merge(df, df_gsalink, on='Building_Number', how='left')
    df_all.rename(columns={'min_time': 'ION_start_time'}, inplace=True)
    df_all['GSALink_start_time'] = df_all['GSALink_start_time'].map(lambda x: np.nan if type(x) == float else datetime.strptime(x, '%Y/%m/%d').strftime('%Y-%m-%d'))
    df_all['ION_start_time'] = df_all['ION_start_time'].map(lambda x: np.nan if type(x) == float else datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d'))
    df_all['days_diff'] = (pd.to_datetime(df_all['GSALink_start_time']) - pd.to_datetime(df_all['ION_start_time']))/np.timedelta64(1, 'D')
    df_all.to_csv(os.getcwd() + '/input/FY/interval/ion_0627/ion_gsalink_start.csv')
    print len(df_all[df_all['days_diff'] > 100])
    return
Exemple #32
0
def plot_trend_per_dd_fromdb(b, s, breakpoints):
    conn = uo.connect('all')
    with conn:
        df_all = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn)
    conn.close()
    df_all.sort(['year', 'month'], inplace=True)
    df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']]
    df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)]
    df_agg = df_all.groupby('year').sum()
    df_agg.drop('month', axis=1, inplace=True)
    df_agg.reset_index(inplace=True)
    df_agg['Date'] = df_agg.apply(lambda r: datetime.datetime(int(r['year']), 1, 1) if not np.isnan(r['year']) else np.nan, axis=1)
    df_agg['eui_gas_perdd'] = df_agg.apply(lambda r: r['eui_gas'] / r['hdd65'] if r['hdd65'] > 0 else np.nan, axis=1)
    df_agg['eui_elec_perdd'] = df_agg.apply(lambda r: r['eui_elec'] / r['cdd65'] if r['cdd65'] > 0 else np.nan, axis=1)
    df_agg.set_index('Date', inplace=True)
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    bx = plt.axes()
    gas_line_color = '#DE4A50'
    elec_line_color = '#429CD5'
    ylimit = max(df_agg['eui_gas_perdd'].max(),
                 df_agg['eui_elec_perdd'].max()) * 1.1
    line1, = plt.plot(df_agg.index, df_agg['eui_gas_perdd'], ls='-',
                      lw=2, marker='o', color=gas_line_color)
    line2, = plt.plot(df_agg.index, df_agg['eui_elec_perdd'], ls='-',
                      lw=2, marker='o', color=elec_line_color)
    hdd = df_agg['hdd65'].tolist()
    hdd = [int(round(x, 0)) for x in hdd]
    cdd = df_agg['cdd65'].tolist()
    cdd = [int(round(x, 0)) for x in cdd]
    for m, n, d in zip(df_agg.index, df_agg['eui_gas_perdd'], hdd):
        bx.annotate('HDD\n{0}'.format(d), xy=(m, n))
    for m, n, d in zip(df_agg.index, df_agg['eui_elec_perdd'], cdd):
        bx.annotate('CDD\n{0}'.format(d), xy=(m, n))
    for bp in breakpoints:
        x = pd.to_datetime([pd.to_datetime(bp)] * 2)
        plt.plot(x, [0, ylimit], ls='--', lw=2, color='gray')
    plt.legend([line1, line2], ['Gas', 'Electric'],
               loc='center left', bbox_to_anchor=(1, 0.5),
               prop={'size':10})
    plt.ylim((0, ylimit))
    plt.title("Electric EUI per degree day (65F) and Gas EUI per degree day (65F) Trend")
    plt.ylabel("[kBtu/(sq.ft*year*degree day(65F)]")
    plt.xlabel("Calendar Year")
    # plt.show()
    P.savefig(os.getcwd() + '/plot_FY_weather/html/single_building/trend/{0}_{1}_year_perdd.png'.format(b, s), dpi = 70)
    plt.close()
    return
Exemple #33
0
def change_area():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql('SELECT Building_Number, [Gross_Sq.Ft] FROM EUAS_area', conn)
    df_max = df.groupby('Building_Number').max()
    df_min = df.groupby('Building_Number').min()
    df_all = pd.merge(df_max, df_min, how='inner', left_index=True, right_index=True, suffixes=['_max', '_min'])
    df_all['diff'] = df_all['Gross_Sq.Ft_max'] - \
                     df_all['Gross_Sq.Ft_min']
    df_all['percent_diff'] = df_all.apply(lambda r: np.nan if r['Gross_Sq.Ft_max'] == 0 else (1 - r['Gross_Sq.Ft_min']/r['Gross_Sq.Ft_max']) * 100, axis=1)
    df_large = df_all[df_all['percent_diff'] > 10]
    print len(df_large)
    df_large.drop('diff', axis=1, inplace=True)
    print df_large.head()
    df_large.to_csv(homedir + 'question/change_area.csv', index=True)
    return
Exemple #34
0
def plot_lean_by_reion(region):
    conn = uo.connect('all')
    with conn:
        df_region = pd.read_sql(
            'SELECT DISTINCT Building_Number, [Region_No.] FROM EUAS_monthly',
            conn)
        df_bs = pd.read_sql('SELECT * FROM EUAS_monthly_weather', conn)
        print len(df_bs)
        df_bs = df_bs.merge(df_region, on='Building_Number', how='left')
        df_bs = df_bs[df_bs['Region_No.'] == '9']
        print len(df_bs)
    bs_dict = dict(zip(df_bs['Building_Number'], df_bs['ICAO']))
    for b in bs_dict:
        s = bs_dict[b]
        result = ltm.lean_temperature_fromdb(b, s, 2, timerange=None)
        if result == None:
            result = (None, None, None)
Exemple #35
0
def invest_cnt():
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql(
            'SELECT DISTINCT Building_Number, high_level_ECM, detail_level_ECM FROM EUAS_ecm WHERE detail_level_ECM != \'GSALink\'',
            conn)
    eng_set = gbs.get_energy_set('eui')
    ai_set = gbs.get_cat_set(['A', 'I'], conn)
    study_set = eng_set.intersection(ai_set)
    df = df[df['Building_Number'].isin(study_set)]
    print df.groupby(['high_level_ECM', 'detail_level_ECM']).count()
    print len(df)
    df = df.groupby(['Building_Number']).filter(lambda x: len(x) == 1)
    print len(df)
    print df.head()
    print df.groupby(['high_level_ECM', 'detail_level_ECM']).count()
    return
Exemple #36
0
def exclude():
    df = pd.read_csv(os.getcwd() + '/input/FY/excluded_buildings.csv')
    df['exclude'] = 'Yes'
    conn = uo.connect('all')
    with conn:
        df1 = pd.read_sql(
            'SELECT DISTINCT Building_Number FROM EUAS_monthly WHERE Fiscal_Year = \'2015\'',
            conn)
        df2 = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category',
                          conn)
    df3 = pd.merge(df1, df2, on='Building_Number', how='left')
    df4 = pd.merge(df3, df, on='Building_Number', how='left')
    # df4 = df4[~df4['Cat'].isin(['A', 'I'])]
    print df4.head()
    conn.close()
    df4.to_csv(homedir + 'temp/exclude_2015.csv', index=False)
    print 'end'
Exemple #37
0
def count_invest():
    conn = uo.connect('all')
    with conn:
        df1 = pd.read_sql(
            'SELECT DISTINCT Building_Number, high_level_ECM FROM EUAS_ecm WHERE high_level_ECM != \'GSALink\'',
            conn)
        df2 = pd.read_sql(
            'SELECT DISTINCT Building_Number, ECM_program FROM EUAS_ecm_program',
            conn)
    eng_set = gbs.get_energy_set('eui')
    ai_set = gbs.get_cat_set(['A', 'I'], conn)
    study_set = eng_set.intersection(ai_set)
    df1.dropna(subset=['high_level_ECM'], inplace=True)
    df2.dropna(subset=['ECM_program'], inplace=True)
    df1 = df1[df1['Building_Number'].isin(study_set)]
    df2 = df2[df2['Building_Number'].isin(study_set)]
    print df1['high_level_ECM'].value_counts()
    print df2['ECM_program'].value_counts()
    return
Exemple #38
0
def plot_scatter(hue_str, measure_type):
    conn = uo.connect('interval_ion_single')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    sns.set_style("whitegrid")
    sns.set_palette("Set2", 8)
    sns.set_context("talk", font_scale=1)
    col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5}
    upper = {'electric': 600, 'gas': 2500}
    ylabel = {'electric': 'electric (kwh)'.title(), 'gas': 'gas (Cubic Feet ?)'.title()}
    for b, s in bs_pair:
        print b, s
        # df_w = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'\2015-01-01\' and \'2016-01-01\''.format(s), conn)
        with conn:
            df_w = pd.read_sql('SELECT * FROM {0}'.format(s), conn)
            df_e = pd.read_sql('SELECT Timestamp, {0} FROM {1}'.format(b, measure_type), conn)
            local = pd.to_datetime(df_w['Timestamp']).map(lambda x: x - np.timedelta64(5, 'h'))
            local_str = local.map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
            df_w['Timestamp'] = local_str
        df_all = pd.merge(df_w, df_e, on='Timestamp', how='inner')
        df_all['hour'] = df_all['Timestamp'].map(lambda x: x[11:13]) 
        df_all['month'] = df_all['Timestamp'].map(lambda x: x[5:7]) 
        df_all['day'] = df_all['Timestamp'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%a')) 

        if hue_str is None:
            sns.regplot(x='Temperature_F', y=b, data=df_all, fit_reg=False)
            plt.ylabel(ylabel[measure_type])
            plt.title('{0} vs Temperature (F): {1}'.format(ylabel[measure_type], b))
            plt.ylim((0, upper[measure_type]))
        elif hue_str == 'piece':
            df_all = df_all[df_all[b] > 0]
            plot_piecewise(measure_type, df_all, b, s)
        else:
            g = sns.lmplot(x='Temperature_F', y=b, hue=hue_str, col=hue_str, col_wrap=col_wrap_dict[hue_str], size=3, data=df_all, fit_reg=False)
            g.set(ylim=(0, upper[measure_type]))
            g.set(ylabel=ylabel[measure_type])
        plt.gca().set_ylim(bottom=0)
        # plt.show()
        path = os.getcwd() + '/input/FY/interval/ion_0627/{0}_scatter/{1}_{2}_{3}'.format(measure_type, b, s, hue_str)
        P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
        plt.close()
    return
Exemple #39
0
def write_robust_energy_set():
    s = gbs.get_energy_set('eui')
    e = gbs.get_energy_set('eui_elec')
    g = gbs.get_energy_set('eui_gas')
    conn = uo.connect('all')
    def get_status(x):
        if x in s:
            return "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015"
        elif x in e:
            return "Electric EUI >= 12 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3"
        elif x in g:
            return "Gas EUI >= 3 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12"
        else:
            return "There exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3 and there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12"
    with conn:
        df = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category', conn)
    # df['status'] = df['Building_Number'].map(lambda x: "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015" if x in s else 'Not robust energy')
    df['status'] = df['Building_Number'].map(get_status)
    df.to_csv(r_input + 'robust_energy_sep.csv', index=False)
Exemple #40
0
def gsalink_facility_id():
    conn = uo.connect('other_input')
    with conn:
        df1 = pd.read_sql(
            'SELECT Building_ID AS Building_Number FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates',
            conn)
        df2 = pd.read_sql(
            'SELECT Building_Number, Facility_Number FROM building_facility',
            conn)
    # print len(df2)
    print df1.head()
    # print '${0}$'.format(df1.ix[1, 0])
    # print df2.head()
    # print '${0}$'.format(df2.ix[0, 0])
    df = pd.merge(df1, df2, on='Building_Number', how='left')
    print df.head()
    df.dropna(subset=['Facility_Number'], inplace=True)
    print len(df.groupby('Facility_Number').filter(lambda x: len(x) > 1))
    print len(df.groupby('Facility_Number').filter(lambda x: len(x) == 1))
Exemple #41
0
def plot_trend_fromdb(b, s, breakpoints):
    conn = uo.connect('all')
    with conn:
        df_all = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn)
    conn.close()
    df_all.sort(['year', 'month'], inplace=True)
    df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']]
    df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)]
    print type(df_all['eui_gas'].tolist()[0])
    byyear = df_all.groupby(['year']).sum()
    byyear.index = byyear.index.map(lambda x: datetime.datetime(int(x), 1, 1))
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    bx = plt.axes()
    gas_line_color = '#DE4A50'
    elec_line_color = '#429CD5'
    hdd = byyear['hdd65'].tolist()
    hdd = [int(round(x, 0)) for x in hdd]
    cdd = byyear['cdd65'].tolist()
    cdd = [int(round(x, 0)) for x in cdd]
    bx = plt.axes()
    line1, = plt.plot(byyear.index, byyear['eui_gas'], ls='-', lw=2,
                      marker='o', color=gas_line_color)
    line2, = plt.plot(byyear.index, byyear['eui_elec'], ls='-', lw=2,
                      marker='o', color=elec_line_color)
    for m, n, d in zip(byyear.index, byyear['eui_gas'], hdd):
        bx.annotate('HDD\n{0}'.format(d), xy=(m, n))
    for m, n, d in zip(byyear.index, byyear['eui_elec'], cdd):
        bx.annotate('CDD\n{0}'.format(d), xy=(m, n))
    for bp in breakpoints:
        x = pd.to_datetime([pd.to_datetime(bp)] * 2)
        y = bx.get_ylim()
        plt.plot(x, y, ls='--', lw=2, color='gray')
    plt.legend([line1, line2], ['Gas', 'Electric'],
               loc='center left', bbox_to_anchor=(1, 0.5),
               prop={'size':10})
    plt.title("Electric EUI and Gas EUI Trend")
    plt.ylabel("[kBtu/sq.ft/year]")
    plt.xlabel("Calendar Year")
    P.savefig(os.getcwd() + '/plot_FY_weather/html/single_building/trend/{0}_{1}_year.png'.format(b, s), dpi = 70)
    plt.close()
    return
Exemple #42
0
def plot_outlier(measure_type):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    col = value_lb_dict[measure_type]
    print len(bs_pair)
    for b, s in bs_pair[:1]:
        print b, s
        with conn:
            df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn)
        df = df[df[col] >= 0]
        points = df[col]
        outliers_mild, outliers_extreme = box_based_roll_outlier(points, nb_size=1000)
        maxi = max(points)
        df['outlier_mild'] = map(lambda x: maxi * 0.5 if x else np.nan, outliers_mild)
        df['outlier_extreme'] = map(lambda x: maxi * 0.75 if x else np.nan, outliers_extreme)
        df.to_csv(homedir + 'temp/{0}.csv'.format(b), index=False)
    return
def plot_outlier(measure_type):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    col = value_lb_dict[measure_type]
    print len(bs_pair)
    for b, s in bs_pair[:1]:
        print b, s
        with conn:
            df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn)
        df = df[df[col] >= 0]
        points = df[col]
        outliers_mild, outliers_extreme = box_based_roll_outlier(points, nb_size=1000)
        maxi = max(points)
        df['outlier_mild'] = map(lambda x: maxi * 0.5 if x else np.nan, outliers_mild)
        df['outlier_extreme'] = map(lambda x: maxi * 0.75 if x else np.nan, outliers_extreme)
        df.to_csv(homedir + 'temp/{0}.csv'.format(b), index=False)
    return
Exemple #44
0
def write_robust_energy_set():
    s = gbs.get_energy_set('eui')
    e = gbs.get_energy_set('eui_elec')
    g = gbs.get_energy_set('eui_gas')
    conn = uo.connect('all')

    def get_status(x):
        if x in s:
            return "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015"
        elif x in e:
            return "Electric EUI >= 12 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3"
        elif x in g:
            return "Gas EUI >= 3 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12"
        else:
            return "There exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3 and there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12"

    with conn:
        df = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category',
                         conn)
    # df['status'] = df['Building_Number'].map(lambda x: "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015" if x in s else 'Not robust energy')
    df['status'] = df['Building_Number'].map(get_status)
    df.to_csv(r_input + 'robust_energy_sep.csv', index=False)
Exemple #45
0
def check_interval(filename):
    df = pd.read_csv(inputdir + filename)
    df.rename(columns=lambda x: x[:8] if x != 'Timestamp' else x, inplace=True)
    df.dropna(axis=1, how='all', inplace=True)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.set_index(pd.DatetimeIndex(df['Timestamp']), inplace=True)
    # df.info()
    df_re = df.resample('M', how='sum')
    cols = list(df_re)
    df_re.reset_index(inplace=True)
    df_long = pd.melt(df_re, id_vars='index', value_vars=cols)
    # print
    # print df_long.head()
    df_long.rename(columns={
        'index': 'Timestamp',
        'variable': 'Building_Number',
        'value': 'Electricity_(KWH)'
    },
                   inplace=True)
    df_long['month'] = df_long['Timestamp'].map(lambda x: x.month)
    df_long['year'] = df_long['Timestamp'].map(lambda x: x.year)
    col_str = ','.join(['\'{0}\''.format(x) for x in cols])
    conn = uo.connect('all')
    with conn:
        df = pd.read_sql(
            'SELECT Building_Number, year, month, [Electricity_(KWH)] FROM EUAS_monthly WHERE Building_Number IN ({0}) AND year = \'2015\''
            .format(col_str), conn)
    # print df.head()
    df_long.drop('Timestamp', axis=1, inplace=True)
    df_all = pd.merge(df,
                      df_long,
                      how='left',
                      on=['Building_Number', 'year', 'month'],
                      suffixes=['_EUAS', '_ION'])
    df_all['ratio'] = df_all['Electricity_(KWH)_ION'] / df_all[
        'Electricity_(KWH)_EUAS'].map(lambda x: round(x, 3))
    df_all['percent_diff'] = df_all['ratio'].map(lambda x: abs(1 - x) * 100.0)
    # print df_all.head()
    return df_all
Exemple #46
0
def pca_var():
    conn = uo.connect('weather_hourly_utc')
    with conn:
        df = pd.read_sql('SELECT * FROM downloaded', conn)
    nums95 = []
    nums99 = []
    ss = []
    stations = df['ICAO'].tolist()
    for s in stations:
        with conn:
            df_temp = pd.read_sql('SELECT Temperature_F FROM {0}'.format(s),
                                  conn)
        n95, n99 = get_num_pc(df_temp['Temperature_F'], cuts=[0.95, 0.99])
        nums95.append(n95)
        nums99.append(n99)
        ss.append(s)
        print s, n95, n99
    df_out = pd.DataFrame({
        'ICAO': ss,
        'num_pc_95_percent': nums95,
        'num_pc_99_percent': nums99
    })
    df_out.to_csv(homedir + 'num_pc.csv')
Exemple #47
0
def ion_gsalink_time():
    df = pd.read_csv(os.getcwd() + \
                     '/input/FY/interval/ion_0627/summary_long/summary_electric.csv')
    df = df[['Building_Number', 'min_time']]
    conn = uo.connect('other_input')
    with conn:
        df_gsalink = pd.read_sql(
            'SELECT Building_ID as Building_Number, Rollout_Date as GSALink_start_time FROM  GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates',
            conn)
    df_all = pd.merge(df, df_gsalink, on='Building_Number', how='left')
    df_all.rename(columns={'min_time': 'ION_start_time'}, inplace=True)
    df_all['GSALink_start_time'] = df_all['GSALink_start_time'].map(
        lambda x: np.nan if type(x) == float else datetime.strptime(
            x, '%Y/%m/%d').strftime('%Y-%m-%d'))
    df_all['ION_start_time'] = df_all['ION_start_time'].map(
        lambda x: np.nan if type(x) == float else datetime.strptime(
            x, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d'))
    df_all['days_diff'] = (
        pd.to_datetime(df_all['GSALink_start_time']) -
        pd.to_datetime(df_all['ION_start_time'])) / np.timedelta64(1, 'D')
    df_all.to_csv(os.getcwd() +
                  '/input/FY/interval/ion_0627/ion_gsalink_start.csv')
    print len(df_all[df_all['days_diff'] > 100])
    return
Exemple #48
0
def plot_action_fromdb():
    conn = uo.connect('all')
    with conn:
        df_action = pd.read_sql('SELECT * FROM EUAS_ecm', conn)
        df_pro = pd.read_sql('SELECT * FROM EUAS_ecm_program', conn)
        df_bs = pd.read_sql('SELECT * FROM EUAS_monthly_weather', conn)
    bs_dict = dict(zip(df_bs['Building_Number'], df_bs['ICAO']))
    df_action = df_action[['Building_Number', 'high_level_ECM', 'detail_level_ECM', 'Substantial_Completion_Date']]
    df_act = df_action.copy()
    df_act = df_act.dropna()
    df_act['action'] = df_act.apply(lambda r: r['high_level_ECM'] + ' -- ' + r['detail_level_ECM'], axis=1)
    df_act.drop(['high_level_ECM', 'detail_level_ECM'], axis=1,
                inplace=True)
    gr = df_act.groupby('Building_Number')
    names = list(gr.groups)
    # print names.index('WA0120BN')
    lines = ['Building_Number,Time,Action,Electric_Saving,Gas_Saving,Electric_Before,Electric_After,Gas_Before,Gas_After,Electric_CVRMSE,Gas_CVRMSE']
    # FIXME: PA0060ZZ has None in eui_gas
    del names[153]
    # names = ['CA0306ZZ']
    for i, name in enumerate(names):
        print i, name, '222222222222222222222222222222'
        group = gr.get_group(name)
        df_temp = group.groupby(['Substantial_Completion_Date'])['action'].apply(lambda x: '\n'.join(x))
        df_show = df_temp.to_frame('ECM action')
        # df_show['Building Number'] = 'CT0013ZZ'
        df_show.reset_index(inplace=True)
        df_show['Substantial_Completion_Date'] = pd.to_datetime(df_show['Substantial_Completion_Date'])
        df_show.sort('Substantial_Completion_Date', inplace=True)
        days_diff = useq.dist_between_adjacent(df_show['Substantial_Completion_Date'].tolist())
        pair = zip(df_show['Substantial_Completion_Date'].tolist(),
                   df_show['ECM action'].tolist())
        str_pair = [('{0}-{1}-{2}'.format(x[0].year, x[0].month, x[0].day), x[1]) for x in pair]
        breakpoints = [x[0] for x in str_pair]
        print breakpoints
        assert(len(breakpoints) > 0)
        ranges = ['before {0}'.format(breakpoints[0])] + useq.merge_adjacent(breakpoints, lambda x, y: '{0} -- {1}'.format(x, y)) + ['after {0}'.format(breakpoints[-1])]
        actionpoints = [x[1] for x in str_pair]
        actions = ['pre {0}'.format(actionpoints[0])] + useq.merge_adjacent(actionpoints, lambda x, y: 'post {0} pre {1}'.format(x, y)) + ['post {0}'.format(actionpoints[-1])]
        b = name
        # df_eng = gr_energy.get_group(b)
        # df_eng.reset_index(inplace=True)
        s = bs_dict[b]
        ar_pair = zip(actions, ranges)
        # un-comment if need to plot energy and dd trend side by side
        # plot_trend_fromdb(b, s, breakpoints)

        print 'plot trend per dd'
        plot_trend_per_dd_fromdb(b, s, breakpoints)

        results = []
        for a, r in zip(actions, ranges):
            result = ltm.lean_temperature_fromdb(b, s, 2, r, action=a)
            if result == None:
                result = (None, None, None)
            d = {'building': b, 'station': s, 'timerange': r, 'action': a}
            result += tuple([d])
            results.append(result)
        length = len(results)
        if length == 0:
            print 'no lean plot generated'
            continue
        def merge_action(string):
            string = string.replace('\n', ';')
            string = string.replace('pre ', '')
            return string
        def concat(string):
            return string.replace('\n', ';')
        for i in range(len(results) - 1):
            d_save, d_cvrmse = plot_saving_fromdb(b, s, results[i], results[i + 1])
            lines.append(','.join(map(str, [b, breakpoints[i],
                                            # (merge_action(actions[i])),
                                            concat(actionpoints[i]),
                                            d_save['elec_percent'],
                                            d_save['gas_percent'],
                                            d_save['elec_before'],
                                            d_save['elec_after'],
                                            d_save['gas_before'],
                                            d_save['gas_after'],
                                            d_cvrmse['elec'],d_cvrmse['gas']])))
        process_html(b, s, results, breakpoints)
    with open (os.getcwd() + '/plot_FY_weather/html/table/action_saving.csv', 'w+') as wt:
        wt.write('\n'.join(lines))
def piecewise_reg_one_fromdb(b, s, n_par, theme, cuttail, timerange=None, *args):
    sns.set_style("whitegrid")
    sns.set_palette("Set2")
    sns.set_context("paper", font_scale=1)
    if len(args) == 0:
        conn = uo.connect('all')
        with conn:
            df = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn)
        conn.close()
    else:
        df = args[0]
    # df = df[df[s].notnull()]
    # df['year'] = df['timestamp'].map(lambda x: int(x[:4]))
    # df['month'] = df['timestamp'].map(lambda x: int(x[5:7]))
    df['timestamp'] = df.apply(lambda r: '{0}-{1}-1'.format(int(r['year']), int(r['month'])), axis=1)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.sort(['year', 'month'], inplace=True)
    if timerange == None:
        df = df.tail(n=36)
    else:
        yearcol, timefilter = util.get_time_filter(timerange)
        df['in_range'] = df[yearcol].map(timefilter)
        df = df[df['in_range']]
    if len(df) > 36 and cuttail:
        df = df.tail(n=36)
    if len(df) < 6:
        print 'not enough data points in {0}'.format(timerange)
        return None
    if 'day' in df:
        df = df[['timestamp', 'year', 'month', 'ave', theme, 'day']]
    else:
        df = df[['timestamp', 'year', 'month', 'ave', theme]]
    df = df[df['ave'].notnull()]
    x = np.array(df['ave'])
    y = np.array(df[theme])
    # t = np.array(df['begin_time'])
    t = np.array(df['timestamp'])
    x_min = x.min()
    x_max = x.max()
    break_low = 40
    break_high = 81
    xd = np.linspace(x_min, x_max, 150)
    cvrmses = []
    ps = []
    slope_side = []
    breakpoint_cal = []
    if n_par == 2:
        breakpoints = range(break_low, break_high)
        for breakpoint in breakpoints:
            def piecewise_linear_leftslope(x, k, intercept):
                return np.piecewise(x, [x < breakpoint, x >= breakpoint], [lambda x:k * x + intercept, lambda x:k * breakpoint + intercept])
            p1 , e1 = optimize.curve_fit(piecewise_linear_leftslope,
                                         x, y)
            cvrmse = CVRMSE(x, y, p1, piecewise_linear_leftslope,
                            n_par)
            cvrmses.append(cvrmse)
            ps.append(p1)
            slope_side.append('left')
            breakpoint_cal.append(breakpoint)

            if theme == 'eui_elec':
                def piecewise_linear_rightslope(x, k, intercept):
                    return np.piecewise(x, [x >= breakpoint, x < breakpoint], [lambda x:k * x + intercept, lambda x:k * breakpoint + intercept])
                p2 , e2 = optimize.curve_fit(piecewise_linear_rightslope,
                                            x, y)
                cvrmse = CVRMSE(x, y, p2, piecewise_linear_rightslope,
                                n_par)
                cvrmses.append(cvrmse)
                ps.append(p2)
                slope_side.append('right')
                breakpoint_cal.append(breakpoint)
        result = sorted(zip(breakpoint_cal, cvrmses, ps, slope_side), key=lambda x: x[1])
    elif n_par == 3:
        breakpoints = [(i, j) for i in range(break_low, break_high) for j in range(i + 1, break_high)]
        for (break_1, break_2) in breakpoints:
            def piecewise_linear(x, k1, b1, k2):
                x0 = break_1
                x1 = break_2
                y0 = k1 * x0 + b1
                y1 = y0
                return np.piecewise(x, [x < x0, x >= x1], [lambda x:k1 * x + b1, lambda x:k2 * (x - x1) + y1, lambda x:y0])
            p , e = optimize.curve_fit(piecewise_linear, x, y)
            cvrmse = CVRMSE(x, y, p, piecewise_linear, n_par)
            cvrmses.append(cvrmse)
            ps.append(p)
            slope_side.append('NA')
        result = sorted(zip(breakpoints, cvrmses, ps, slope_side), key=lambda x: x[1])
    best = result[0]
    b_point_opt = best[0]
    p_opt = best[2]
    slope_side_opt = best[3]
    cvrmse_opt = best[1]
    # print theme, slope_side_opt, p_opt
    # print 'breakpoint: {1}, error: {0}'.format(cvrmse_opt, b_point_opt)
    if n_par == 2:
        if slope_side_opt == 'left':
            def piecewise_linear(x, k, intercept):
                return np.piecewise(x, [x < b_point_opt, x >= b_point_opt], [lambda x:k * x + intercept, lambda x:k * b_point_opt + intercept])
        else:
            assert(slope_side_opt == 'right')
            def piecewise_linear(x, k, intercept):
                return np.piecewise(x, [x >= b_point_opt, x < b_point_opt], [lambda x:k * x + intercept, lambda x:k * b_point_opt + intercept])
    elif n_par == 3:
        def piecewise_linear(x, k1, b1, k2):
            x0 = b_point_opt[0]
            x1 = b_point_opt[1]
            y0 = k1 * x0 + b1
            y1 = y0
            return np.piecewise(x, [x < x0, x >= x1], [lambda x:k1 * x + b1, lambda x:k2 * (x - x1) + y1, lambda x:y0])
    if abs(p_opt[0] - 0) < 1e-20:
        print 'all zero {0} consumption: {1}'.format(theme, p_opt)
        good_regression = False
        return None
    # bx = plt.axes()
    # bx.plot(x, y, "o")
    # g = sns.lmplot(x=s, y=theme, hue='day', data=df, fit_reg=False)
    # # print piecewise_linear(xd, *p_opt)
    # plt.ylabel('kBtu')
    # # plt.gca().set_ylim(bottom=0)
    # ax = g.axes
    # ax[0, 0].set_ylim((0, 1e5))
    # P.savefig('{3}scatter_{0}_{1}_{2}.png'.format(b, s, theme, image_output_dir), dpi = 150)
    # # plt.show()
    # plt.close()
    # g = sns.lmplot(x=s, y=theme, col='day', hue='day', col_wrap=3, size=3, data=df, fit_reg=False)
    # ax = g.axes
    # for i in ax:
    #     i.set_ylim((0, 1e5))
    # P.savefig('{3}scatter_{0}_{1}_{2}_3b3.png'.format(b, s, theme, image_output_dir), dpi = 150)
    # plt.close()

    plt.plot(x, y, "o")
    plt.plot(xd, piecewise_linear(xd, *p_opt), "-")
    plt.title('break point {0}F, CV(RMSE): {1}'.format(b_point_opt, cvrmse_opt))
    # plt.show()
    # P.savefig('{3}regression_{0}_{1}_{2}.png'.format(b, s, theme, image_output_dir), dpi = 150)
    plt.close()
    good_regression = True
    # remove electric heating
    if theme == 'eui_elec':
        if slope_side_opt == 'left':
            print 'bad electric regression left side -----------------'
            good_regression = False
        if slope_side_opt == 'right' and p_opt[0] < 0:
            print 'bad electric regression right side ----------------'
            good_regression = False
    if theme == 'eui_gas':
        if slope_side_opt == 'left' and p_opt[0] > 0:
            print 'bad gas regression left side ----------------------'
            good_regression = False
        if slope_side_opt == 'right' > 0:
            print 'bad gas regression right side ---------------------'
            good_regression = False
    return {'breakpoint': b_point_opt, 'CV(RMSE)': best[1],
            'regression_par': p_opt, 'x_range': (x_min, x_max), 'fun':
            piecewise_linear, 'x': x, 'y': y, 'good_regression':
            good_regression, 'df': df}
Exemple #50
0
def temp():
    conn = uo.connect('interval_ion')
    with conn:
        df = pd.read_sql('SELECT * FROM area', conn)
    df.to_csv(homedir + 'temp/area.csv')
Exemple #51
0
def fit(measure_type, year=None):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_area = pd.read_sql('SELECT * FROM area', conn)
    df_area.set_index('Building_Number', inplace=True)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    sns.set_style("whitegrid")
    sns.set_palette("Set2", 2)
    sns.set_context("talk", font_scale=1)
    # col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5, 'status':2}
    # upper = {'electric': 600, 'gas': 2500}
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    multiplier_dict = {'electric':  3.412, 'gas': 1.026}
    col = value_lb_dict[measure_type]
    m = multiplier_dict[measure_type]
    ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'}
    # test = ['TN0088ZZ', 'TX0057ZZ', 'NY0281ZZ', 'NY0304ZZ', 'MO0106ZZ']
    # test = ['NM0050ZZ']
    # bs_pair = [x for x in bs_pair if x[0] in test]
    lines = ['Building_Number,week night save%,weekend day save%,weekend night save%,aggregate save%,CVRMSE week day,CVRMSE week night,CVRMSE weekend day,CVRMSE weekend night']
    print len(bs_pair)
    # bs_pair = bs_pair[:1]
    for b, s in bs_pair:
        print b, s
        try:
            area = df_area.ix[b, 'Gross_Sq.Ft']
        except KeyError:
            print 'No area found'
            continue
        df = join_interval(b, s, area, col, m, measure_type, conn, year)
        if len(df) == 0:
            continue
        df.to_csv(homedir + 'temp/{0}.csv'.format(b))
        df = df[df[col] >= 0]
        points = df[col]
        # outliers = show_outlier(points, b, 'upper', measure_type, 5)
        outliers = show_outlier(points, b, 'upper', measure_type, 1.5)
        df['outlier'] = outliers
        df = df[~np.array(outliers)]
        df['status_week_day_night'] = \
            df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1)
        min_time = df['Timestamp'].min()
        max_time = df['Timestamp'].max()
        sns.set_style("whitegrid")
        colors = sns.color_palette('Paired', 16)
        colors_rgb = [util.float2hex(x) for x in colors]
        sns.set_context("talk", font_scale=1)
        gr = df.groupby('status_week_day_night')
        f, axarr = plt.subplots(2, 2, sharex=True, sharey=True)
        d0 = plot_piece(gr, axarr[0, 0], 'week day', colors_rgb[0], measure_type, b, s)
        if not d0 is None:
            axarr[0, 0].set_title('{0}\nbreak point {1}F, CV(RMSE): {2:.3f}'.format('week day', d0['breakpoint'], d0['CV(RMSE)']))
        d1 = plot_piece(gr, axarr[0, 1], 'week night', colors_rgb[1], measure_type, b, s)
        d2 = plot_piece(gr, axarr[1, 0], 'weekend day', colors_rgb[2], measure_type, b, s)
        d3 = plot_piece(gr, axarr[1, 1], 'weekend night', colors_rgb[3], measure_type, b, s)
        save, err = compute_saving_all(d0, d1, d2, d3, axarr)
        plt.suptitle('{0} -- {1}'.format(min_time, max_time))
        f.text(0.5, 0.04, 'Temperature_F', ha='center', va='center')
        if year is None:
            path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise/{1}/{0}_{1}.png'.format(b, measure_type)
        else:
            path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise/{1}/{0}_{1}_{2}.png'.format(b, measure_type, int(year))
        P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
        shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise', 'plot_FY_weather/html/interval/lean'))
        plt.close()
        lines.append(','.join([b] + save + err))
    if year is None:
        table_path = os.getcwd() + '/input/FY/interval/ion_0627/table/{0}_save.csv'.format(measure_type)
    else:
        table_path = os.getcwd() + '/input/FY/interval/ion_0627/table/{0}_save_{1}.csv'.format(measure_type, int(year))
    with open(table_path, 'w+') as wt:
        wt.write('\n'.join(lines))
    return
def temp():
    conn = uo.connect('interval_ion')
    with conn:
        df = pd.read_sql('SELECT * FROM area', conn)
    df.to_csv(homedir + 'temp/area.csv')
Exemple #53
0
def read_interval_building(b):
    conn = uo.connect('interval_ion')
    with conn:
        df = pd.read_sql('SELECT * FROM electric WHERE Building_Number = \'{0}\''.format(b), conn)
    df.to_csv(homedir + 'temp/{0}_int.csv'.format(b))
def read_interval_building(b):
    conn = uo.connect('interval_ion')
    with conn:
        df = pd.read_sql('SELECT * FROM electric WHERE Building_Number = \'{0}\''.format(b), conn)
    df.to_csv(homedir + 'temp/{0}_int.csv'.format(b))
Exemple #55
0
def plot_scatter_long(hue_str, measure_type):
    conn = uo.connect('interval_ion')
    with conn:
        df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn)
        df_area = pd.read_sql('SELECT * FROM area', conn)
    df_area.set_index('Building_Number', inplace=True)
    bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO'])
    sns.set_style("whitegrid")
    sns.set_palette("Set2", 2)
    sns.set_context("talk", font_scale=1.5)
    col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5, 'status':2}
    upper = {'electric': 600, 'gas': 2500}
    value_lb_dict = {'electric': 'Electric_(KWH)', 'gas':
                     'Gas_(CubicFeet)'}
    multiplier_dict = {'electric':  3.412, 'gas': 1.026}
    ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'}
    # test = ['TX0057ZZ', 'NY0281ZZ', 'NY0304ZZ', 'MO0106ZZ']
    # bs_pair = [x for x in bs_pair if x[0] in test]
    for b, s in bs_pair:
        print b, s
        # df_w = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'\2015-01-01\' and \'2016-01-01\''.format(s), conn)
        col = value_lb_dict[measure_type]
        m = multiplier_dict[measure_type]
        with conn:
            df_w = pd.read_sql('SELECT * FROM {0}'.format(s), conn)
            df_minute = pd.read_sql('SELECT Timestamp, [{0}] FROM {1} WHERE Building_Number = \'{2}\''.format(col, measure_type, b), conn)
        df_minute['h'] = df_minute['Timestamp'].map(lambda x: x[:-5] + '00:00')
        df_e = df_minute.groupby('h').sum()
        df_e.reset_index(inplace=True)
        df_e.rename(columns={'h': 'Timestamp'}, inplace=True)
        try:
            area = df_area.ix[b, 'Gross_Sq.Ft']
        except KeyError:
            print 'no area'
            continue
        df_e['eui'] = df_e[col] * m / area
        local = pd.to_datetime(df_w['Timestamp']).map(lambda x: x - np.timedelta64(5, 'h'))
        local_str = local.map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
        df_w['Timestamp'] = local_str
        df_all = pd.merge(df_w, df_e, on='Timestamp', how='inner')
        df_all['hour'] = df_all['Timestamp'].map(lambda x: x[11:13]) 
        df_all['month'] = df_all['Timestamp'].map(lambda x: x[5:7]) 
        df_all['year'] = df_all['Timestamp'].map(lambda x: x[:4]) 
        df_all['day'] = df_all['Timestamp'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%a')) 
        df_all['status'] = df_all.apply(lambda r: 'other' if float(r['hour']) < 6 or float(r['hour']) > 18 or r['day'] in ['Sat', 'Sun'] else 'work hour', axis=1)
        df_all['status_week_day_night'] = \
            df_all.apply(lambda r: get_status_v0(r['hour'], r['day']),
                         axis=1)
        df_all['status_week_day_night_nonflex'] = \
            df_all.apply(lambda r: get_status_v0(r['hour'], r['day']),
                         axis=1)
        df_all.to_csv('/home/yujiex/Public/{0}.csv'.format(b), index=False)
        df_all['method'] = 'non flex week'
        df_all2 = df_all.copy()
        df_all2['status_week_day_night'] = \
            df_all2.apply(lambda r: get_status(r['hour'], r['day']),
                          axis=1)
        df_all2['method'] = 'flex week'
        if hue_str is None:
            sns.regplot(x='Temperature_F', y='eui', data=df_all, fit_reg=False)
            plt.ylabel(ylabel[measure_type])
            plt.title('{0} vs Temperature (F): {1}'.format(ylabel[measure_type], b))
            # plt.ylim((0, upper[measure_type]))
        elif hue_str is 'status':
            g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str,
                           hue_order=['work hour', 'other'],
                           palette='husl', size=5, data=df_all,
                           fit_reg=False)
            plt.ylabel(ylabel[measure_type])
            g.set(ylabel=ylabel[measure_type])
            if measure_type == 'electric':
                plt.ylim((0, 0.003 * 4))
            elif measure_type == 'gas':
                plt.ylim((0, 0.008 * 4))
            min_time = df_all['Timestamp'].min()
            max_time = df_all['Timestamp'].max()
            print min_time, max_time
            plt.title('{0} {1} Setback'.format(b,
                                               measure_type.title()))
            plt.suptitle('{0} -- {1}'.format(min_time, max_time))
            plt.subplots_adjust(top=0.85)
        elif hue_str == 'status_week_day_night_nonflex':
            df_plot = df_all.groupby('year').filter(lambda x: len(x) > 7500)
            if len(df_plot) == 0:
                print 'not enough data points'
                continue
            g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str,
                           col='year',
                           hue_order=['week day', 'weekend day',
                                      'week night', 'weekend night'], 
                           palette='husl',
                           # palette='Paired',
                           size=5, aspect = 1.0, data=df_plot, 
                           fit_reg=False, scatter_kws={'s':8}, 
                           legend=False)
                           # lowess=True, scatter_kws={'s':4})
            plt.ylabel(ylabel[measure_type])
            g.set(ylabel=ylabel[measure_type])
            if measure_type == 'electric':
                plt.ylim((0, 0.003 * 4))
            elif measure_type == 'gas':
                plt.ylim((0, 0.008 * 4))
                # plt.ylim((0, 0.04))
            min_time = df_all['Timestamp'].min()
            max_time = df_all['Timestamp'].max()
            plt.legend(loc='center left', bbox_to_anchor=(1, 0.5),
                       markerscale=3)
            print min_time, max_time
            # plt.title('{0} {1} Setback'.format(b,
            #                                    measure_type.title()))
            # plt.suptitle('{0} -- {1}'.format(min_time, max_time))
            title = '{0} {1} Setback\n{2} -- {3}'.format(b, measure_type.title(), min_time, max_time)
            plt.suptitle(title)
            plt.subplots_adjust(top=0.85)
        elif hue_str == 'status_week_day_night':
            # check distribution of the shooting up ones
            # df_all = df_all[(df_all['Temperature_F'] < 50) & (df_all['Temperature_F'] > 45)]
            # df_all = df_all[df_all['status_week_day_night'] == 'week day']
            # print df_all.describe()
            # sns.distplot(df_all[df_all['eui'] < 40]['eui'])
            # print len(df_all[df_all['eui'] > 40])

            df_con = pd.concat([df_all, df_all2], ignore_index=True)
            g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str,
                           col='year', row='method',
                           hue_order=['week day', 'weekend day',
                                      'week night', 'weekend night'], 
                           palette='husl',
                           # palette='Paired',
                           size=5, aspect = 1.0, data=df_con, 
                           fit_reg=False)
                           # lowess=True, scatter_kws={'s':4})
            plt.ylabel(ylabel[measure_type])
            g.set(ylabel=ylabel[measure_type])
            if measure_type == 'electric':
                plt.ylim((0, 0.003 * 4))
            elif measure_type == 'gas':
                plt.ylim((0, 0.008 * 4))
                # plt.ylim((0, 0.04))
            min_time = df_all['Timestamp'].min()
            max_time = df_all['Timestamp'].max()

            print min_time, max_time
            # plt.title('{0} {1} Setback'.format(b,
            #                                    measure_type.title()))
            # plt.suptitle('{0} -- {1}'.format(min_time, max_time))
            title = '{0} {1} Setback\n{2} -- {3}'.format(b, measure_type.title(), min_time, max_time)
            plt.suptitle(title)
            plt.subplots_adjust(top=0.85)
        elif hue_str == 'cluster':
            cl.plot_cluster(df_all, b, s, measure_type, 0.18, 100)
            continue
        elif hue_str == 'piece':
            df_all = df_all[df_all[b] > 0]
            plot_piecewise(measure_type, df_all, b, s)
        else:
            g = sns.lmplot(x='Temperature_F', y=b, hue=hue_str, col=hue_str, col_wrap=col_wrap_dict[hue_str], size=3, data=df_all, fit_reg=False)
            g.set(ylim=(0, upper[measure_type]))
            g.set(ylabel=ylabel[measure_type])
        plt.gca().set_ylim(bottom=0)
        # plt.show()
        path = os.getcwd() + '/input/FY/interval/ion_0627/{0}_scatter/{1}_{2}_{3}'.format(measure_type, b, s, hue_str)
        P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight')
        plt.close()
    return
def fit_time(measure_type, region, season=None):
    conn = uo.connect('all')
    with conn:
        df_bs = pd.read_sql(
            'SELECT Building_Number, ICAO, eui_elec, eui_gas, year, month, ave FROM EUAS_monthly_weather',
            conn)
        df_region = pd.read_sql(
            'SELECT DISTINCT Building_Number, [Region_No.] FROM EUAS_monthly',
            conn)
    if measure_type == 'electric':
        good_set = gbs.get_energy_set('eui_elec')
    elif measure_type == 'gas':
        good_set = gbs.get_energy_set('gas')
    df_bs = pd.merge(df_bs, df_region, on='Building_Number', how='left')
    df_bs = df_bs[df_bs['Building_Number'].map(lambda x: x in good_set)]
    df_bs = df_bs[df_bs['Region_No.'] == str(region)]
    df_bs = df_bs[df_bs['eui_elec'].notnull()]
    df_bs = df_bs[df_bs['eui_gas'].notnull()]
    df_bs.sort_values(by=['Building_Number', 'year', 'month'],
                      ascending=False,
                      inplace=True)
    bs_pair = list(set(zip(df_bs['Building_Number'], df_bs['ICAO'])))
    df_bs['Timestamp'] = df_bs.apply(
        lambda r: '{}-{}'.format(int(r['year']), int(r['month'])), axis=1)
    sns.set_style("whitegrid")
    sns.set_context("talk", font_scale=1)
    ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'}
    col_dict = {'electric': 'eui_elec', 'gas': 'eui_gas'}
    print len(bs_pair)
    sns.set_style("whitegrid")
    # palette = sns.cubehelix_palette(len(bs_pair))
    palette = sns.color_palette('husl', len(bs_pair))
    sns.set_palette(palette)
    colors_rgb = [util.float2hex(x) for x in palette]
    sns.set_context("talk", font_scale=1)
    jsondir = os.getcwd(
    ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/json/'.format(
        region)
    title = "Region {}".format(region)
    col = col_dict[measure_type]
    for i, (b, s) in enumerate(bs_pair):
        print b, s
        df = df_bs[df_bs['Building_Number'] == b]
        df = df.head(n=36)
        print df.head()
        points = df[col]
        min_time = df['Timestamp'].min()
        max_time = df['Timestamp'].max()
        bx = plt.axes()
        d0 = plot_piece(df,
                        bx,
                        title,
                        colors_rgb[i],
                        measure_type,
                        b,
                        s,
                        scatter=False,
                        annote=True,
                        jsondir=jsondir,
                        season=season)
    plt.xlabel('Temperature_F')
    # plt.show()
    if season is None:
        path = os.getcwd(
        ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/{}.png'.format(
            region, measure_type)
    else:
        path = os.getcwd(
        ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/{}_{}.png'.format(
            region, measure_type, season)
    P.savefig(path,
              dpi=my_dpi,
              figsize=(2000 / my_dpi, 500 / my_dpi),
              bbox_inches='tight')
    plt.close()
    return
import sqlite3
import pandas as pd
import numpy as np
import os
import geocoder
import time

import util_io as uo
homedir = os.getcwd() + '/csv_FY/'
# project_dir = '/media/yujiex/work/project/data/'

project_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '/data/'

conn = uo.connect('interval_ion')
with conn:
    df1 = pd.read_sql('SELECT * FROM electric_id', conn)
    df2 = pd.read_sql('SELECT * FROM gas_id', conn)
df1['has_electric'] = 1
df2['has_gas'] = 1
df = pd.merge(df1, df2, on='id', how='outer')
df.rename(columns={'id': 'Building_Number'}, inplace=True)
conn.close()

conn = uo.connect('other_input')
with conn:
    df3 = pd.read_sql('SELECT * FROM Entire_GSA_Building_Portfolio_input',
                      conn)
df = pd.merge(df, df3, on='Building_Number', how='left')
df['in_facility'] = df.apply(
    lambda r: 1 if r['Building_Number'] == r['Facility_ID'] else np.nan,
    axis=1)
Exemple #58
0
def compare_interval_db(b):
    conn = uo.connect('ION data.db')
    with conn:
        df = read_sql('SELECT * FROM ION_electricity WHERE Building_Number = \'{0}\''.format(b))
    df.info()