def add_temperature_long(): conn = uo.connect('all') with conn: df = pd.read_sql('SELECT DISTINCT Building_Number, ICAO FROM EUAS_monthly_weather', conn) conn.close() # print df.head() conn = uo.connect('interval_ion') with conn: ids_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id'] ids_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id'] df_id_elec = df[df['Building_Number'].isin(ids_elec)] df_id_gas = df[df['Building_Number'].isin(ids_gas)] stations = np.union1d(df_id_elec['ICAO'].unique(), df_id_gas['ICAO'].unique()) with conn: df_id_elec.to_sql('electric_id_station', conn, if_exists='replace') df_id_gas.to_sql('gas_id_station', conn, if_exists='replace') conn_w = uo.connect('weather_hourly_utc') print len(stations) for s in stations: with conn_w: ds = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'2011-09-01\' and \'2016-06-28\''.format(s), conn_w) print s with conn: ds.to_sql(s, conn, if_exists='replace') conn_w.close() conn.close()
def compare_id(): conn = uo.connect('interval_ion') with conn: id_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id'] id_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id'] conn2 = uo.connect('all') with conn2: id_gsalink = pd.read_sql( 'SELECT * FROM EUAS_ecm WHERE high_level_ECM = \'GSALink\'', conn2)['Building_Number'] print len(set(id_gsalink)) print len(set(id_elec).intersection(set(id_gsalink))) print len(set(id_gas).intersection(set(id_gsalink))) print len( set(id_elec).intersection(set(id_gas)).intersection(set(id_gsalink)))
def change_area(): conn = uo.connect('all') with conn: df = pd.read_sql( 'SELECT Building_Number, [Gross_Sq.Ft] FROM EUAS_area', conn) df_max = df.groupby('Building_Number').max() df_min = df.groupby('Building_Number').min() df_all = pd.merge(df_max, df_min, how='inner', left_index=True, right_index=True, suffixes=['_max', '_min']) df_all['diff'] = df_all['Gross_Sq.Ft_max'] - \ df_all['Gross_Sq.Ft_min'] df_all['percent_diff'] = df_all.apply( lambda r: np.nan if r['Gross_Sq.Ft_max'] == 0 else (1 - r['Gross_Sq.Ft_min'] / r['Gross_Sq.Ft_max']) * 100, axis=1) df_large = df_all[df_all['percent_diff'] > 10] print len(df_large) df_large.drop('diff', axis=1, inplace=True) print df_large.head() df_large.to_csv(homedir + 'question/change_area.csv', index=True) return
def facility_vs_building_set(s): print s conn = uo.connect('all') if s == 'AI': ids = gbs.get_cat_set(['A', 'I'], conn) elif s == 'covered': ids = gbs.get_covered_set() df_f = pd.read_csv( os.getcwd() + '/input/FY/covered/Covered_Facilities_All Energy mmBTUs_FY14_EISA07Sec432_input.csv' ) facility_eisa = set(df_f['Facility_Number'].tolist()) facility_eisa = [x[:8] for x in facility_eisa if type(x) != float] f_ids = [x for x in ids if '0000' in x] # for x in sorted(f_ids): # print x b_ids = [x for x in ids if not '0000' in x] print 'total {0}, facility {1}, building {2}'.format( len(ids), len(f_ids), len(b_ids)) bf_ids = ['{0}0000{1}'.format(x[:2], x[-2:]) for x in b_ids] print len(common) common = (set(bf_ids).intersection(f_ids)) for y in common: print y print[x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y] if s == 'covered': print 'eisa facility', len(facility_eisa) print 'common ids from eisa', len( set(f_ids).intersection(facility_eisa)) print 'different ids from eisa', (set(f_ids).difference(facility_eisa)) common = (set(f_ids).difference(facility_eisa)) for y in common: print y print[x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
def missing_gsalink(): conn1 = uo.connect('all') with conn1: df1 = pd.read_sql('SELECT DISTINCT Building_Number FROM EUAS_monthly', conn1) conn2 = uo.connect('other_input') with conn2: df2 = pd.read_sql( 'SELECT DISTINCT Building_ID AS Building_Number FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates', conn2) # print len(df2) missing = set(df2['Building_Number'].tolist()).difference( df1['Building_Number'].tolist()) print len(missing) print print missing
def remove_outliers(measure_type): value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} col = value_lb_dict[measure_type] conn = uo.connect('interval_ion') dfs = [] with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) # bs_pair = [x for x in bs_pair if x[0] == 'AL0039AB'] for i, (b, s) in enumerate(bs_pair): print i, b with conn: df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn) # df = df.head(n = 5000) # df.info() points = df[col] outliers = show_outlier(points, b, 'upper', measure_type, 1.5) # outliers = show_outlier(points, b, 'pos_roll', measure_type, 1.5) # mild, outliers = show_outlier(points, b, 'box', measure_type, 1.5) df['outlier'] = outliers print len([x for x in outliers if x]) dfs.append(df) df_all = pd.concat(dfs, ignore_index=True) print df_all.head() with conn: df_all.to_sql('{0}_outlier_tag'.format(measure_type), conn, if_exists='replace') return
def summary_long(measure_type): conn = uo.connect('interval_ion') with conn: df_id = pd.read_sql('SELECT * FROM {0}_id'.format(measure_type), conn) lines = [] lines.append('Building_Number,neg_count,min,max,median,75percent,min_time,max_time,expect_count,actural_count,missing_count') ids = df_id['id'].tolist() for name in ids: with conn: df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, name), conn) df['Timestamp'] = pd.to_datetime(df['Timestamp']) cols = list(df) cols.remove('Timestamp') cols.remove('Building_Number') c = cols[0] temp = df.dropna(subset=[c]) neg = temp[c].tolist() neg = [x for x in neg if x < 0] min_time = temp['Timestamp'].min() min_time_str = min_time.strftime('%Y-%m-%d %H:%M:%S') max_time = temp['Timestamp'].max() max_time_str = max_time.strftime('%Y-%m-%d %H:%M:%S') expect_count = int(pd.to_timedelta((max_time - min_time)) / np.timedelta64(15,'m')) + 1 actural_count = len(temp) missing_count = expect_count - actural_count mini = temp[c].min() maxi = temp[c].max() median = temp[c].median() q3 = temp[c].quantile(0.75) print (','.join([name, str(int(len(neg))), str(mini), str(maxi), str(median), str(q3), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)])) lines.append(','.join([name, str(int(len(neg))), str(mini), str(maxi), str(median), str(q3), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)])) indir = os.getcwd() + '/input/FY/interval/ion_0627/summary_long/' with open (indir + 'summary_{0}.csv'.format(measure_type), 'w+') as wt: wt.write('\n'.join(lines))
def remove_outliers(measure_type): value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} col = value_lb_dict[measure_type] conn = uo.connect('interval_ion') dfs = [] with conn: # df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_id = pd.read_sql('SELECT * FROM {0}_id'.format(measure_type), conn) # bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) # bs_pair = [x for x in bs_pair if x[0] == 'AL0039AB'] ids = df_id['id'] for i, b in enumerate(ids): print i, b with conn: df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn) print df.head() # df = df.head(n = 5000) # df.info() points = df[col] outliers = show_outlier(points, b, 'upper', measure_type, 1.5) # outliers = show_outlier(points, b, 'pos_roll', measure_type, 1.5) # mild, outliers = show_outlier(points, b, 'box', measure_type, 1.5) df['outlier'] = outliers print len([x for x in outliers if x]) dfs.append(df) df_all = pd.concat(dfs, ignore_index=True) print df_all.head() with conn: df_all.to_sql('{0}_outlier_tag'.format(measure_type), conn, if_exists='replace') return
def build_energy_temperature(measure_type): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_area = pd.read_sql('SELECT * FROM area', conn) df_tz = pd.read_sql('SELECT Building_Number, rawOffset FROM EUAS_timezone', conn) df_tz.set_index('Building_Number', inplace=True) df_area.set_index('Building_Number', inplace=True) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} multiplier_dict = {'electric': 3.412, 'gas': 1.026} col = value_lb_dict[measure_type] m = multiplier_dict[measure_type] dfs = [] # bs_pair = [x for x in bs_pair if x[0] == 'OR0033PE'] for b, s in bs_pair: print b, s try: area = df_area.ix[b, 'Gross_Sq.Ft'] except KeyError: print 'No area found' continue offset = df_tz.loc[b, 'rawOffset'] df = join_interval(offset, b, s, area, col, m, measure_type, conn) if len(df) == 0: continue dfs.append(df) df['Building_Number'] = b df_all = pd.concat(dfs) with conn: df_all.to_sql('{0}_wtemp'.format(measure_type), conn, if_exists='replace') conn.close() print 'end'
def facility_vs_building_set(s): print s conn = uo.connect('all') if s == 'AI': ids = gbs.get_cat_set(['A', 'I'], conn) elif s == 'covered': ids = gbs.get_covered_set() df_f = pd.read_csv(os.getcwd() + '/input/FY/covered/Covered_Facilities_All Energy mmBTUs_FY14_EISA07Sec432_input.csv') facility_eisa = set(df_f['Facility_Number'].tolist()) facility_eisa = [x[:8] for x in facility_eisa if type(x) != float] f_ids = [x for x in ids if '0000' in x] # for x in sorted(f_ids): # print x b_ids = [x for x in ids if not '0000' in x] print 'total {0}, facility {1}, building {2}'.format(len(ids), len(f_ids), len(b_ids)) bf_ids = ['{0}0000{1}'.format(x[:2], x[-2:]) for x in b_ids] print len(common) common = (set(bf_ids).intersection(f_ids)) for y in common: print y print [x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y] if s == 'covered': print 'eisa facility', len(facility_eisa) print 'common ids from eisa', len(set(f_ids).intersection(facility_eisa)) print 'different ids from eisa', (set(f_ids).difference(facility_eisa)) common = (set(f_ids).difference(facility_eisa)) for y in common: print y print [x for x in b_ids if '{0}0000{1}'.format(x[:2], x[-2:]) == y]
def check_interval(filename): df = pd.read_csv(inputdir + filename) df.rename(columns=lambda x: x[:8] if x != 'Timestamp' else x, inplace=True) df.dropna(axis=1, how='all', inplace=True) df['Timestamp'] = pd.to_datetime(df['Timestamp']) df.set_index(pd.DatetimeIndex(df['Timestamp']), inplace=True) # df.info() df_re = df.resample('M', how='sum') cols = list(df_re) df_re.reset_index(inplace=True) df_long = pd.melt(df_re, id_vars='index', value_vars=cols) # print # print df_long.head() df_long.rename(columns={'index':'Timestamp', 'variable': 'Building_Number', 'value': 'Electricity_(KWH)'}, inplace=True) df_long['month'] = df_long['Timestamp'].map(lambda x: x.month) df_long['year'] = df_long['Timestamp'].map(lambda x: x.year) col_str = ','.join(['\'{0}\''.format(x) for x in cols]) conn = uo.connect('all') with conn: df = pd.read_sql('SELECT Building_Number, year, month, [Electricity_(KWH)] FROM EUAS_monthly WHERE Building_Number IN ({0}) AND year = \'2015\''.format(col_str), conn) # print df.head() df_long.drop('Timestamp', axis=1, inplace=True) df_all = pd.merge(df, df_long, how='left', on=['Building_Number', 'year', 'month'], suffixes=['_EUAS', '_ION']) df_all['ratio'] = df_all['Electricity_(KWH)_ION']/df_all['Electricity_(KWH)_EUAS'].map(lambda x: round(x, 3)) df_all['percent_diff'] = df_all['ratio'].map(lambda x: abs(1 - x) * 100.0) # print df_all.head() return df_all
def summary_wide(measure_type): conn = uo.connect('interval_ion_single') with conn: df = pd.read_sql('SELECT * FROM {0}'.format(measure_type), conn) conn.close() df['Timestamp'] = pd.to_datetime(df['Timestamp']) cols = list(df) cols.remove('Timestamp') lines = [] lines.append('Building_Number,neg_count,min,max,median,min_time,max_time,expect_count,actural_count,missing_count') for c in cols: temp = df[['Timestamp', c]].copy() temp.dropna(subset=[c], inplace=True) neg = temp[c].tolist() neg = [x for x in neg if x < 0] min_time = temp['Timestamp'].min() min_time_str = min_time.strftime('%Y-%m-%d %H:%M:%S') max_time = temp['Timestamp'].max() max_time_str = max_time.strftime('%Y-%m-%d %H:%M:%S') expect_count = int(pd.to_timedelta((max_time - min_time)) / np.timedelta64(15,'m')) + 1 actural_count = len(temp) missing_count = expect_count - actural_count mini = temp[c].min() maxi = temp[c].max() median = temp[c].median() print (','.join([c, str(int(len(neg))), str(mini), str(maxi), str(median), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)])) lines.append(','.join([c, str(int(len(neg))), str(mini), str(maxi), str(median), min_time_str, max_time_str, str(expect_count), str(actural_count), str(missing_count)])) indir = os.getcwd() + '/input/FY/interval/ion_0627/summary/' with open (indir + 'summary_{0}.csv'.format(measure_type), 'w+') as wt: wt.write('\n'.join(lines))
def compare_interval_db(b): conn = uo.connect('ION data.db') with conn: df = read_sql( 'SELECT * FROM ION_electricity WHERE Building_Number = \'{0}\''. format(b)) df.info()
def read_ion(measure_type, prefix, value_label): indir = os.getcwd() + '/input/FY/interval/ion_0627/{0}/'.format(measure_type) df1 = read_by_region(indir, prefix + ' 1_2_3_4*', value_label, True) # temp = pd.to_datetime(df_melt['Timestamp'][:5]) df2 = read_by_region(indir, prefix + ' 5_6*', value_label, True) # print len(df2), '222' df3 = read_by_region(indir, prefix + ' 7_to_11*', value_label, True) # print len(df3), '333' df_melt = pd.concat([df1, df2, df3], ignore_index=True) print len(df_melt) df_melt.dropna(subset=[value_label], inplace=True) ids = df_melt['Building_Number'].unique() df_id = pd.DataFrame({'id': ids}) print len(df_melt) conn = uo.connect('interval_ion') with conn: c = conn.cursor() c.execute("DROP TABLE IF EXISTS {0}".format(measure_type)) df_melt.to_sql(measure_type, conn, if_exist='replace') df_id.to_sql('{0}_id'.format(measure_type), conn, if_exist='replace') conn.close() print 'end'
def table_for_robust_set(): conn = uo.connect('all') study_set = gbs.get_energy_set('eui').intersection(gbs.get_cat_set(['A', 'I'], conn)) df = pd.read_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving.csv') df = df[df['Building_Number'].isin(study_set)] df.sort('Building_Number', inplace=True) df.to_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving_robustset.csv', index=False) return
def check_hourly(b, measure_type): conn = uo.connect('interval_ion') euas_dict = {'electric': 'Electricity_(KWH)', 'gas': 'Gas_(Cubic_Ft)'} ion_dict = {'electric': 'Electric_(KWH)', 'gas':'Gas_(CubicFeet)'} with conn: df1 = pd.read_sql('SELECT * FROM {1} WHERE Building_Number = \'{0}\''.format(b, measure_type), conn) conn.close() df1['Date'] = pd.DatetimeIndex(pd.to_datetime(df1['Timestamp'])) df1.set_index(df1['Date'], inplace=True) df1_re = df1.resample('M', 'sum') df1_re['month'] = df1_re.index.month df1_re['year'] = df1_re.index.year df1_re.reset_index(inplace=True) conn = uo.connect('all') with conn: df2 = pd.read_sql('SELECT Building_Number, year, month, [{1}] FROM EUAS_monthly WHERE Building_Number = \'{0}\''.format(b, euas_dict[measure_type]), conn) if len(df1) == 0 or len(df2) == 0: return df_all = pd.merge(df1_re, df2, on=['year', 'month'], how='left') df_all.set_index(pd.DatetimeIndex(pd.to_datetime(df_all['Date'])), inplace=True) df_all.drop('Date', axis=1, inplace=True) df_all.rename(columns={ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS'}, inplace=True) df_inn = pd.merge(df1_re, df2, on=['year', 'month'], how='inner') df_inn.set_index(pd.DatetimeIndex(pd.to_datetime(df_inn['Date'])), inplace=True) df_inn.drop('Date', axis=1, inplace=True) df_inn.rename(columns={ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS'}, inplace=True) df_inn[b] = df_inn['ION']/df_inn['EUAS'] # df_inn.to_csv(homedir + 'temp/{0}_{1}_ion_euas.csv'.format(b, measure_type)) # temp check the data dsc = df_inn[[b]].describe().transpose() dsc['overall'] = df_inn['ION'].sum()/df_inn['EUAS'].sum() sns.set_context("talk", font_scale=1.0) sns.set_palette(sns.color_palette('Set2')) line1, = plt.plot(df_all.index, df_all['ION'], '-o') line2, = plt.plot(df_all.index, df_all['EUAS'], '-o') plt.legend([line1, line2], ['ION', 'EUAS'], loc='center left', bbox_to_anchor=(1, 0.5)) plt.title('{0} {1} ION vs EUAS monthly'.format(b, measure_type), fontsize=30) if measure_type == 'electric': plt.ylabel('KWH') else: plt.ylabel('Cubic Feet') # plt.show() path = os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/{0}_{1}.png'.format(b, measure_type) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') plt.close() return dsc
def fit_time(measure_type, occtime, season=None): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_area = pd.read_sql('SELECT * FROM area', conn) df_tz = pd.read_sql('SELECT Building_Number, rawOffset FROM EUAS_timezone', conn) df_tz.set_index('Building_Number', inplace=True) df_area.set_index('Building_Number', inplace=True) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} multiplier_dict = {'electric': 3.412, 'gas': 1.026} col = value_lb_dict[measure_type] m = multiplier_dict[measure_type] ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'} print len(bs_pair) sns.set_style("whitegrid") # palette = sns.cubehelix_palette(len(bs_pair)) palette = sns.color_palette('husl', len(bs_pair)) sns.set_palette(palette) colors_rgb = [util.float2hex(x) for x in palette] sns.set_context("talk", font_scale=1) jsondir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/json_{0}/'.format(occtime) # csvdir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/csv/' for i, (b, s) in enumerate(bs_pair): print b, s try: area = df_area.ix[b, 'Gross_Sq.Ft'] except KeyError: print 'No area found' continue offset = df_tz.loc[b, 'rawOffset'] df = join_interval(offset, b, s, area, col, m, measure_type, conn, season=season) df.to_csv(homedir + 'temp/{0}.csv'.format(b)) df = df[df[col] >= 0] points = df[col] outliers = show_outlier(points, b, 'upper', measure_type, 1.5) df['outlier'] = outliers df = df[~np.array(outliers)] df['status_week_day_night'] = \ df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1) min_time = df['Timestamp'].min() max_time = df['Timestamp'].max() gr = df.groupby('status_week_day_night') bx = plt.axes() d0 = plot_piece(gr, bx, occtime, colors_rgb[i], measure_type, b, s, scatter=False, annote=True, jsondir=jsondir, season=season) plt.xlabel('Temperature_F') # plt.show() if season is None: path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}.png'.format(measure_type, occtime) else: path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}_{2}.png'.format(measure_type, occtime, season) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise_all', 'plot_FY_weather/html/interval/lean/all')) plt.close() return
def ecm_program_no_date(): conn = uo.connect('all') with conn: df = pd.read_sql('SELECT * FROM EUAS_ecm_program', conn) df = df[df['ECM_program'].notnull()] df.drop_duplicates(cols=['Building_Number', 'ECM_program'], inplace=True) df.rename(columns={'ECM_program': 'energy_program'}, inplace=True) df.to_csv(homedir + 'question/program_date.csv', index=False) return
def plot_trend_fromdb(b, s, breakpoints): conn = uo.connect('all') with conn: df_all = pd.read_sql( 'SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\'' .format(b), conn) conn.close() df_all.sort(['year', 'month'], inplace=True) df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']] df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)] print type(df_all['eui_gas'].tolist()[0]) byyear = df_all.groupby(['year']).sum() byyear.index = byyear.index.map(lambda x: datetime.datetime(int(x), 1, 1)) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) bx = plt.axes() gas_line_color = '#DE4A50' elec_line_color = '#429CD5' hdd = byyear['hdd65'].tolist() hdd = [int(round(x, 0)) for x in hdd] cdd = byyear['cdd65'].tolist() cdd = [int(round(x, 0)) for x in cdd] bx = plt.axes() line1, = plt.plot(byyear.index, byyear['eui_gas'], ls='-', lw=2, marker='o', color=gas_line_color) line2, = plt.plot(byyear.index, byyear['eui_elec'], ls='-', lw=2, marker='o', color=elec_line_color) for m, n, d in zip(byyear.index, byyear['eui_gas'], hdd): bx.annotate('HDD\n{0}'.format(d), xy=(m, n)) for m, n, d in zip(byyear.index, byyear['eui_elec'], cdd): bx.annotate('CDD\n{0}'.format(d), xy=(m, n)) for bp in breakpoints: x = pd.to_datetime([pd.to_datetime(bp)] * 2) y = bx.get_ylim() plt.plot(x, y, ls='--', lw=2, color='gray') plt.legend([line1, line2], ['Gas', 'Electric'], loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 10}) plt.title("Electric EUI and Gas EUI Trend") plt.ylabel("[kBtu/sq.ft/year]") plt.xlabel("Calendar Year") P.savefig( os.getcwd() + '/plot_FY_weather/html/single_building/trend/{0}_{1}_year.png'.format( b, s), dpi=70) plt.close() return
def add_area(): conn = uo.connect('all') with conn: df_area = pd.read_sql('SELECT DISTINCT Building_Number, [Gross_Sq.Ft] FROM EUAS_area WHERE Fiscal_Year > 2010', conn) conn.close() conn = uo.connect('interval_ion') with conn: ids_elec = pd.read_sql('SELECT * FROM electric_id', conn)['id'] ids_gas = pd.read_sql('SELECT * FROM gas_id', conn)['id'] ids = np.union1d(ids_elec, ids_gas) df_area = df_area[df_area['Building_Number'].isin(ids)] print len(df_area) print df_area.head(n = 20) change_area = df_area.groupby('Building_Number').filter(lambda x: len(x) > 2) assert(len(change_area) == 0) with conn: df_area.to_sql('area', conn, if_exists='replace') conn.close()
def count_db_entries(db): conn = uo.connect(db) c = conn.cursor() tables = util.get_list_tables(c) acc = 0 for t in tables: c.execute("SELECT COUNT (*) FROM {0};".format(t)) acc += c.fetchone()[0] print acc return
def check_0711(): conn = uo.connect('interval_ion') # check_match(conn, 'electric') check_match(conn, 'gas') conn.close() # keys = ['mean', 'std', 'min', '25%', '50%', '75%', 'max', 'overall'] # format_dict = {k: lambda x: '{0:.2f}'.format(x) for k in keys} # uo.csv2html(os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/electric_ratio.csv', {'Unnamed: 0': 'Building_Number'}, format_dict) # uo.csv2html(os.getcwd() + '/input/FY/interval/ion_0627/cmp_euas/gas_ratio.csv', {'Unnamed: 0': 'Building_Number'}, format_dict) return
def num_invest(): conn = uo.connect('all') studyset = gbs.get_650_set(conn) df1 = pd.DataFrame({'Building_Number': list(studyset)}) with conn: df2 = pd.read_sql('SELECT * FROM EUAS_invest_nona', conn) df = pd.merge(df1, df2, on='Building_Number', how='left') df.groupby('Building_Number').count().to_csv(homedir + 'temp/invest_cnt_all.csv') cnt = df.groupby('Building_Number').count() print cnt.groupby('investment').count()
def missing_area(): conn = uo.connect('interval_ion') with conn: df_area = pd.read_sql('SELECT * FROM area', conn) id_elec = pd.read_sql('SELECT id FROM electric_id', conn) id_gas = pd.read_sql('SELECT id FROM gas_id', conn) ids = np.union1d(id_elec['id'], id_gas['id']) missing = np.setdiff1d(ids, df_area['Building_Number']) print for x in missing: print x
def table_for_robust_set(): conn = uo.connect('all') study_set = gbs.get_energy_set('eui').intersection( gbs.get_cat_set(['A', 'I'], conn)) df = pd.read_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving.csv') df = df[df['Building_Number'].isin(study_set)] df.sort('Building_Number', inplace=True) df.to_csv(os.getcwd() + '/plot_FY_weather/html/table/action_saving_robustset.csv', index=False) return
def fit_time(measure_type, occtime): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_area = pd.read_sql('SELECT * FROM area', conn) df_area.set_index('Building_Number', inplace=True) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} multiplier_dict = {'electric': 3.412, 'gas': 1.026} col = value_lb_dict[measure_type] m = multiplier_dict[measure_type] ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'} print len(bs_pair) sns.set_style("whitegrid") # palette = sns.cubehelix_palette(len(bs_pair)) palette = sns.color_palette('husl', len(bs_pair)) sns.set_palette(palette) colors_rgb = [util.float2hex(x) for x in palette] sns.set_context("talk", font_scale=1) jsondir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/json_{0}/'.format(occtime) # csvdir = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/csv/' for i, (b, s) in enumerate(bs_pair)[:5]: print b, s try: area = df_area.ix[b, 'Gross_Sq.Ft'] except KeyError: print 'No area found' continue df = join_interval(b, s, area, col, m, measure_type, conn) # df.to_csv(homedir + 'temp/{0}.csv'.format(b)) df = df[df[col] >= 0] points = df[col] outliers = show_outlier(points, b, 'upper', measure_type, 5) df['outlier'] = outliers df = df[~np.array(outliers)] df['status_week_day_night'] = \ df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1) min_time = df['Timestamp'].min() max_time = df['Timestamp'].max() gr = df.groupby('status_week_day_night') bx = plt.axes() d0 = plot_piece(gr, bx, occtime, colors_rgb[i], measure_type, b, s, scatter=False, annote=True, jsondir=jsondir) plt.xlabel('Temperature_F') # plt.show() path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise_all/{0}_{1}.png'.format(measure_type, occtime) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise_all', 'plot_FY_weather/html/interval/lean/all')) plt.close() return
def add_temperature_wide(): conn = uo.connect('all') with conn: df = pd.read_sql('SELECT DISTINCT Building_Number, ICAO FROM EUAS_monthly_weather', conn) conn.close() # print df.head() conn = uo.connect('interval_ion_single') with conn: df_elec = pd.read_sql('SELECT * FROM electric', conn) df_gas = pd.read_sql('SELECT * FROM gas', conn) ids_elec = list(df_elec) ids_gas = list(df_gas) ids_elec.remove('Timestamp') ids_gas.remove('Timestamp') df_id_elec = df[df['Building_Number'].isin(ids_elec)] df_id_gas = df[df['Building_Number'].isin(ids_gas)] stations = np.intersect1d(df_id_elec['ICAO'].unique(), df_id_elec['ICAO'].unique()) with conn: df_id_elec.to_sql('electric_id_station', conn, if_exists='replace') df_id_gas.to_sql('gas_id_station', conn, if_exists='replace') conn_w = uo.connect('weather_hourly_utc') print len(stations)
def study_set_plot(): conn = uo.connect('all') with conn: # df1 = pd.read_sql('SELECT DISTINCT Building_Number, Fiscal_Year FROM EUAS_monthly', conn) df1 = pd.read_sql( 'SELECT DISTINCT Building_Number, Fiscal_Year FROM eui_by_fy', conn) df2 = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category', conn) meter_set = gbs.get_action_set('high_level_ECM', ['Advanced Metering']) df = pd.merge(df1, df2, on='Building_Number', how='left') df = df[df['Fiscal_Year'] > 2006] df = df[df['Fiscal_Year'] < 2016] df3 = df.groupby('Building_Number').filter(lambda x: len(x) > 5) ai_set = gbs.get_cat_set(['A', 'I'], conn) # invest = gbs.get_invest_set()[-1] invest = gbs.get_ecm_set() all_building = set(df3['Building_Number'].tolist()) print 'all building > 5 years of data: {0}'.format(len(all_building)) print 'all building > 5 years of data + ecm: {0}'.format( len(all_building.intersection(invest))) print 'all building > 5 years of data + meter: {0}'.format( len(all_building.intersection(meter_set))) df4 = df[df['Cat'].isin( ['A', 'I'])].groupby('Building_Number').filter(lambda x: len(x) > 5) ai_building = set(df4['Building_Number'].tolist()) print 'A + I building > 5 years of data: {0}'.format(len(ai_building)) print 'A + I building > 5 years of data + ecm: {0}'.format( len(ai_building.intersection(invest))) print 'A + I building > 5 years of data + meter: {0}'.format( len(ai_building.intersection(meter_set))) print 'elec ', len(gbs.get_energy_set('elec').intersection(ai_set)) print 'elec + ecm', len( gbs.get_energy_set('elec').intersection(ai_set).intersection(invest)) print 'elec + meter', len( gbs.get_energy_set('elec').intersection(ai_set).intersection( meter_set)) print 'gas ', len(gbs.get_energy_set('gas').intersection(ai_set)) print 'gas + ecm', len( gbs.get_energy_set('gas').intersection(ai_set).intersection(invest)) print 'gas + meter', len( gbs.get_energy_set('gas').intersection(ai_set).intersection(meter_set)) print 'eui', len(gbs.get_energy_set('eui').intersection(ai_set)) print 'eui + ecm', len( gbs.get_energy_set('eui').intersection(ai_set).intersection(invest)) print 'eui + meter', len( gbs.get_energy_set('eui').intersection(ai_set).intersection(meter_set)) return
def ion_gsalink_time(): df = pd.read_csv(os.getcwd() + \ '/input/FY/interval/ion_0627/summary_long/summary_electric.csv') df = df[['Building_Number', 'min_time']] conn = uo.connect('other_input') with conn: df_gsalink = pd.read_sql('SELECT Building_ID as Building_Number, Rollout_Date as GSALink_start_time FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates', conn) df_all = pd.merge(df, df_gsalink, on='Building_Number', how='left') df_all.rename(columns={'min_time': 'ION_start_time'}, inplace=True) df_all['GSALink_start_time'] = df_all['GSALink_start_time'].map(lambda x: np.nan if type(x) == float else datetime.strptime(x, '%Y/%m/%d').strftime('%Y-%m-%d')) df_all['ION_start_time'] = df_all['ION_start_time'].map(lambda x: np.nan if type(x) == float else datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) df_all['days_diff'] = (pd.to_datetime(df_all['GSALink_start_time']) - pd.to_datetime(df_all['ION_start_time']))/np.timedelta64(1, 'D') df_all.to_csv(os.getcwd() + '/input/FY/interval/ion_0627/ion_gsalink_start.csv') print len(df_all[df_all['days_diff'] > 100]) return
def plot_trend_per_dd_fromdb(b, s, breakpoints): conn = uo.connect('all') with conn: df_all = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn) conn.close() df_all.sort(['year', 'month'], inplace=True) df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']] df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)] df_agg = df_all.groupby('year').sum() df_agg.drop('month', axis=1, inplace=True) df_agg.reset_index(inplace=True) df_agg['Date'] = df_agg.apply(lambda r: datetime.datetime(int(r['year']), 1, 1) if not np.isnan(r['year']) else np.nan, axis=1) df_agg['eui_gas_perdd'] = df_agg.apply(lambda r: r['eui_gas'] / r['hdd65'] if r['hdd65'] > 0 else np.nan, axis=1) df_agg['eui_elec_perdd'] = df_agg.apply(lambda r: r['eui_elec'] / r['cdd65'] if r['cdd65'] > 0 else np.nan, axis=1) df_agg.set_index('Date', inplace=True) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) bx = plt.axes() gas_line_color = '#DE4A50' elec_line_color = '#429CD5' ylimit = max(df_agg['eui_gas_perdd'].max(), df_agg['eui_elec_perdd'].max()) * 1.1 line1, = plt.plot(df_agg.index, df_agg['eui_gas_perdd'], ls='-', lw=2, marker='o', color=gas_line_color) line2, = plt.plot(df_agg.index, df_agg['eui_elec_perdd'], ls='-', lw=2, marker='o', color=elec_line_color) hdd = df_agg['hdd65'].tolist() hdd = [int(round(x, 0)) for x in hdd] cdd = df_agg['cdd65'].tolist() cdd = [int(round(x, 0)) for x in cdd] for m, n, d in zip(df_agg.index, df_agg['eui_gas_perdd'], hdd): bx.annotate('HDD\n{0}'.format(d), xy=(m, n)) for m, n, d in zip(df_agg.index, df_agg['eui_elec_perdd'], cdd): bx.annotate('CDD\n{0}'.format(d), xy=(m, n)) for bp in breakpoints: x = pd.to_datetime([pd.to_datetime(bp)] * 2) plt.plot(x, [0, ylimit], ls='--', lw=2, color='gray') plt.legend([line1, line2], ['Gas', 'Electric'], loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':10}) plt.ylim((0, ylimit)) plt.title("Electric EUI per degree day (65F) and Gas EUI per degree day (65F) Trend") plt.ylabel("[kBtu/(sq.ft*year*degree day(65F)]") plt.xlabel("Calendar Year") # plt.show() P.savefig(os.getcwd() + '/plot_FY_weather/html/single_building/trend/{0}_{1}_year_perdd.png'.format(b, s), dpi = 70) plt.close() return
def change_area(): conn = uo.connect('all') with conn: df = pd.read_sql('SELECT Building_Number, [Gross_Sq.Ft] FROM EUAS_area', conn) df_max = df.groupby('Building_Number').max() df_min = df.groupby('Building_Number').min() df_all = pd.merge(df_max, df_min, how='inner', left_index=True, right_index=True, suffixes=['_max', '_min']) df_all['diff'] = df_all['Gross_Sq.Ft_max'] - \ df_all['Gross_Sq.Ft_min'] df_all['percent_diff'] = df_all.apply(lambda r: np.nan if r['Gross_Sq.Ft_max'] == 0 else (1 - r['Gross_Sq.Ft_min']/r['Gross_Sq.Ft_max']) * 100, axis=1) df_large = df_all[df_all['percent_diff'] > 10] print len(df_large) df_large.drop('diff', axis=1, inplace=True) print df_large.head() df_large.to_csv(homedir + 'question/change_area.csv', index=True) return
def plot_lean_by_reion(region): conn = uo.connect('all') with conn: df_region = pd.read_sql( 'SELECT DISTINCT Building_Number, [Region_No.] FROM EUAS_monthly', conn) df_bs = pd.read_sql('SELECT * FROM EUAS_monthly_weather', conn) print len(df_bs) df_bs = df_bs.merge(df_region, on='Building_Number', how='left') df_bs = df_bs[df_bs['Region_No.'] == '9'] print len(df_bs) bs_dict = dict(zip(df_bs['Building_Number'], df_bs['ICAO'])) for b in bs_dict: s = bs_dict[b] result = ltm.lean_temperature_fromdb(b, s, 2, timerange=None) if result == None: result = (None, None, None)
def invest_cnt(): conn = uo.connect('all') with conn: df = pd.read_sql( 'SELECT DISTINCT Building_Number, high_level_ECM, detail_level_ECM FROM EUAS_ecm WHERE detail_level_ECM != \'GSALink\'', conn) eng_set = gbs.get_energy_set('eui') ai_set = gbs.get_cat_set(['A', 'I'], conn) study_set = eng_set.intersection(ai_set) df = df[df['Building_Number'].isin(study_set)] print df.groupby(['high_level_ECM', 'detail_level_ECM']).count() print len(df) df = df.groupby(['Building_Number']).filter(lambda x: len(x) == 1) print len(df) print df.head() print df.groupby(['high_level_ECM', 'detail_level_ECM']).count() return
def exclude(): df = pd.read_csv(os.getcwd() + '/input/FY/excluded_buildings.csv') df['exclude'] = 'Yes' conn = uo.connect('all') with conn: df1 = pd.read_sql( 'SELECT DISTINCT Building_Number FROM EUAS_monthly WHERE Fiscal_Year = \'2015\'', conn) df2 = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category', conn) df3 = pd.merge(df1, df2, on='Building_Number', how='left') df4 = pd.merge(df3, df, on='Building_Number', how='left') # df4 = df4[~df4['Cat'].isin(['A', 'I'])] print df4.head() conn.close() df4.to_csv(homedir + 'temp/exclude_2015.csv', index=False) print 'end'
def count_invest(): conn = uo.connect('all') with conn: df1 = pd.read_sql( 'SELECT DISTINCT Building_Number, high_level_ECM FROM EUAS_ecm WHERE high_level_ECM != \'GSALink\'', conn) df2 = pd.read_sql( 'SELECT DISTINCT Building_Number, ECM_program FROM EUAS_ecm_program', conn) eng_set = gbs.get_energy_set('eui') ai_set = gbs.get_cat_set(['A', 'I'], conn) study_set = eng_set.intersection(ai_set) df1.dropna(subset=['high_level_ECM'], inplace=True) df2.dropna(subset=['ECM_program'], inplace=True) df1 = df1[df1['Building_Number'].isin(study_set)] df2 = df2[df2['Building_Number'].isin(study_set)] print df1['high_level_ECM'].value_counts() print df2['ECM_program'].value_counts() return
def plot_scatter(hue_str, measure_type): conn = uo.connect('interval_ion_single') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) sns.set_style("whitegrid") sns.set_palette("Set2", 8) sns.set_context("talk", font_scale=1) col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5} upper = {'electric': 600, 'gas': 2500} ylabel = {'electric': 'electric (kwh)'.title(), 'gas': 'gas (Cubic Feet ?)'.title()} for b, s in bs_pair: print b, s # df_w = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'\2015-01-01\' and \'2016-01-01\''.format(s), conn) with conn: df_w = pd.read_sql('SELECT * FROM {0}'.format(s), conn) df_e = pd.read_sql('SELECT Timestamp, {0} FROM {1}'.format(b, measure_type), conn) local = pd.to_datetime(df_w['Timestamp']).map(lambda x: x - np.timedelta64(5, 'h')) local_str = local.map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')) df_w['Timestamp'] = local_str df_all = pd.merge(df_w, df_e, on='Timestamp', how='inner') df_all['hour'] = df_all['Timestamp'].map(lambda x: x[11:13]) df_all['month'] = df_all['Timestamp'].map(lambda x: x[5:7]) df_all['day'] = df_all['Timestamp'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%a')) if hue_str is None: sns.regplot(x='Temperature_F', y=b, data=df_all, fit_reg=False) plt.ylabel(ylabel[measure_type]) plt.title('{0} vs Temperature (F): {1}'.format(ylabel[measure_type], b)) plt.ylim((0, upper[measure_type])) elif hue_str == 'piece': df_all = df_all[df_all[b] > 0] plot_piecewise(measure_type, df_all, b, s) else: g = sns.lmplot(x='Temperature_F', y=b, hue=hue_str, col=hue_str, col_wrap=col_wrap_dict[hue_str], size=3, data=df_all, fit_reg=False) g.set(ylim=(0, upper[measure_type])) g.set(ylabel=ylabel[measure_type]) plt.gca().set_ylim(bottom=0) # plt.show() path = os.getcwd() + '/input/FY/interval/ion_0627/{0}_scatter/{1}_{2}_{3}'.format(measure_type, b, s, hue_str) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') plt.close() return
def write_robust_energy_set(): s = gbs.get_energy_set('eui') e = gbs.get_energy_set('eui_elec') g = gbs.get_energy_set('eui_gas') conn = uo.connect('all') def get_status(x): if x in s: return "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015" elif x in e: return "Electric EUI >= 12 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3" elif x in g: return "Gas EUI >= 3 for at least 6 years from FY2007 to FY2015, there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12" else: return "There exist at least 4 years from FY2007 to FY2015 where Gas EUI < 3 and there exist at least 4 years from FY2007 to FY2015 where Electric EUI < 12" with conn: df = pd.read_sql('SELECT Building_Number, Cat FROM EUAS_category', conn) # df['status'] = df['Building_Number'].map(lambda x: "Electric EUI >= 12 and Gas EUI >= 3for at least 6 years from FY2007 to FY2015" if x in s else 'Not robust energy') df['status'] = df['Building_Number'].map(get_status) df.to_csv(r_input + 'robust_energy_sep.csv', index=False)
def gsalink_facility_id(): conn = uo.connect('other_input') with conn: df1 = pd.read_sql( 'SELECT Building_ID AS Building_Number FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates', conn) df2 = pd.read_sql( 'SELECT Building_Number, Facility_Number FROM building_facility', conn) # print len(df2) print df1.head() # print '${0}$'.format(df1.ix[1, 0]) # print df2.head() # print '${0}$'.format(df2.ix[0, 0]) df = pd.merge(df1, df2, on='Building_Number', how='left') print df.head() df.dropna(subset=['Facility_Number'], inplace=True) print len(df.groupby('Facility_Number').filter(lambda x: len(x) > 1)) print len(df.groupby('Facility_Number').filter(lambda x: len(x) == 1))
def plot_trend_fromdb(b, s, breakpoints): conn = uo.connect('all') with conn: df_all = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn) conn.close() df_all.sort(['year', 'month'], inplace=True) df_all = df_all[['year', 'month', 'eui_elec', 'eui_gas', 'hdd65', 'cdd65']] df_all = df_all[(df_all['year'] < 2016) & (df_all['year'] > 2002)] print type(df_all['eui_gas'].tolist()[0]) byyear = df_all.groupby(['year']).sum() byyear.index = byyear.index.map(lambda x: datetime.datetime(int(x), 1, 1)) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) bx = plt.axes() gas_line_color = '#DE4A50' elec_line_color = '#429CD5' hdd = byyear['hdd65'].tolist() hdd = [int(round(x, 0)) for x in hdd] cdd = byyear['cdd65'].tolist() cdd = [int(round(x, 0)) for x in cdd] bx = plt.axes() line1, = plt.plot(byyear.index, byyear['eui_gas'], ls='-', lw=2, marker='o', color=gas_line_color) line2, = plt.plot(byyear.index, byyear['eui_elec'], ls='-', lw=2, marker='o', color=elec_line_color) for m, n, d in zip(byyear.index, byyear['eui_gas'], hdd): bx.annotate('HDD\n{0}'.format(d), xy=(m, n)) for m, n, d in zip(byyear.index, byyear['eui_elec'], cdd): bx.annotate('CDD\n{0}'.format(d), xy=(m, n)) for bp in breakpoints: x = pd.to_datetime([pd.to_datetime(bp)] * 2) y = bx.get_ylim() plt.plot(x, y, ls='--', lw=2, color='gray') plt.legend([line1, line2], ['Gas', 'Electric'], loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':10}) plt.title("Electric EUI and Gas EUI Trend") plt.ylabel("[kBtu/sq.ft/year]") plt.xlabel("Calendar Year") P.savefig(os.getcwd() + '/plot_FY_weather/html/single_building/trend/{0}_{1}_year.png'.format(b, s), dpi = 70) plt.close() return
def plot_outlier(measure_type): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} col = value_lb_dict[measure_type] print len(bs_pair) for b, s in bs_pair[:1]: print b, s with conn: df = pd.read_sql('SELECT * FROM {0} WHERE Building_Number = \'{1}\''.format(measure_type, b), conn) df = df[df[col] >= 0] points = df[col] outliers_mild, outliers_extreme = box_based_roll_outlier(points, nb_size=1000) maxi = max(points) df['outlier_mild'] = map(lambda x: maxi * 0.5 if x else np.nan, outliers_mild) df['outlier_extreme'] = map(lambda x: maxi * 0.75 if x else np.nan, outliers_extreme) df.to_csv(homedir + 'temp/{0}.csv'.format(b), index=False) return
def check_interval(filename): df = pd.read_csv(inputdir + filename) df.rename(columns=lambda x: x[:8] if x != 'Timestamp' else x, inplace=True) df.dropna(axis=1, how='all', inplace=True) df['Timestamp'] = pd.to_datetime(df['Timestamp']) df.set_index(pd.DatetimeIndex(df['Timestamp']), inplace=True) # df.info() df_re = df.resample('M', how='sum') cols = list(df_re) df_re.reset_index(inplace=True) df_long = pd.melt(df_re, id_vars='index', value_vars=cols) # print # print df_long.head() df_long.rename(columns={ 'index': 'Timestamp', 'variable': 'Building_Number', 'value': 'Electricity_(KWH)' }, inplace=True) df_long['month'] = df_long['Timestamp'].map(lambda x: x.month) df_long['year'] = df_long['Timestamp'].map(lambda x: x.year) col_str = ','.join(['\'{0}\''.format(x) for x in cols]) conn = uo.connect('all') with conn: df = pd.read_sql( 'SELECT Building_Number, year, month, [Electricity_(KWH)] FROM EUAS_monthly WHERE Building_Number IN ({0}) AND year = \'2015\'' .format(col_str), conn) # print df.head() df_long.drop('Timestamp', axis=1, inplace=True) df_all = pd.merge(df, df_long, how='left', on=['Building_Number', 'year', 'month'], suffixes=['_EUAS', '_ION']) df_all['ratio'] = df_all['Electricity_(KWH)_ION'] / df_all[ 'Electricity_(KWH)_EUAS'].map(lambda x: round(x, 3)) df_all['percent_diff'] = df_all['ratio'].map(lambda x: abs(1 - x) * 100.0) # print df_all.head() return df_all
def pca_var(): conn = uo.connect('weather_hourly_utc') with conn: df = pd.read_sql('SELECT * FROM downloaded', conn) nums95 = [] nums99 = [] ss = [] stations = df['ICAO'].tolist() for s in stations: with conn: df_temp = pd.read_sql('SELECT Temperature_F FROM {0}'.format(s), conn) n95, n99 = get_num_pc(df_temp['Temperature_F'], cuts=[0.95, 0.99]) nums95.append(n95) nums99.append(n99) ss.append(s) print s, n95, n99 df_out = pd.DataFrame({ 'ICAO': ss, 'num_pc_95_percent': nums95, 'num_pc_99_percent': nums99 }) df_out.to_csv(homedir + 'num_pc.csv')
def ion_gsalink_time(): df = pd.read_csv(os.getcwd() + \ '/input/FY/interval/ion_0627/summary_long/summary_electric.csv') df = df[['Building_Number', 'min_time']] conn = uo.connect('other_input') with conn: df_gsalink = pd.read_sql( 'SELECT Building_ID as Building_Number, Rollout_Date as GSALink_start_time FROM GSAlink_Buildings_First_55_Opiton_26_Start_Stop_Dates', conn) df_all = pd.merge(df, df_gsalink, on='Building_Number', how='left') df_all.rename(columns={'min_time': 'ION_start_time'}, inplace=True) df_all['GSALink_start_time'] = df_all['GSALink_start_time'].map( lambda x: np.nan if type(x) == float else datetime.strptime( x, '%Y/%m/%d').strftime('%Y-%m-%d')) df_all['ION_start_time'] = df_all['ION_start_time'].map( lambda x: np.nan if type(x) == float else datetime.strptime( x, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) df_all['days_diff'] = ( pd.to_datetime(df_all['GSALink_start_time']) - pd.to_datetime(df_all['ION_start_time'])) / np.timedelta64(1, 'D') df_all.to_csv(os.getcwd() + '/input/FY/interval/ion_0627/ion_gsalink_start.csv') print len(df_all[df_all['days_diff'] > 100]) return
def plot_action_fromdb(): conn = uo.connect('all') with conn: df_action = pd.read_sql('SELECT * FROM EUAS_ecm', conn) df_pro = pd.read_sql('SELECT * FROM EUAS_ecm_program', conn) df_bs = pd.read_sql('SELECT * FROM EUAS_monthly_weather', conn) bs_dict = dict(zip(df_bs['Building_Number'], df_bs['ICAO'])) df_action = df_action[['Building_Number', 'high_level_ECM', 'detail_level_ECM', 'Substantial_Completion_Date']] df_act = df_action.copy() df_act = df_act.dropna() df_act['action'] = df_act.apply(lambda r: r['high_level_ECM'] + ' -- ' + r['detail_level_ECM'], axis=1) df_act.drop(['high_level_ECM', 'detail_level_ECM'], axis=1, inplace=True) gr = df_act.groupby('Building_Number') names = list(gr.groups) # print names.index('WA0120BN') lines = ['Building_Number,Time,Action,Electric_Saving,Gas_Saving,Electric_Before,Electric_After,Gas_Before,Gas_After,Electric_CVRMSE,Gas_CVRMSE'] # FIXME: PA0060ZZ has None in eui_gas del names[153] # names = ['CA0306ZZ'] for i, name in enumerate(names): print i, name, '222222222222222222222222222222' group = gr.get_group(name) df_temp = group.groupby(['Substantial_Completion_Date'])['action'].apply(lambda x: '\n'.join(x)) df_show = df_temp.to_frame('ECM action') # df_show['Building Number'] = 'CT0013ZZ' df_show.reset_index(inplace=True) df_show['Substantial_Completion_Date'] = pd.to_datetime(df_show['Substantial_Completion_Date']) df_show.sort('Substantial_Completion_Date', inplace=True) days_diff = useq.dist_between_adjacent(df_show['Substantial_Completion_Date'].tolist()) pair = zip(df_show['Substantial_Completion_Date'].tolist(), df_show['ECM action'].tolist()) str_pair = [('{0}-{1}-{2}'.format(x[0].year, x[0].month, x[0].day), x[1]) for x in pair] breakpoints = [x[0] for x in str_pair] print breakpoints assert(len(breakpoints) > 0) ranges = ['before {0}'.format(breakpoints[0])] + useq.merge_adjacent(breakpoints, lambda x, y: '{0} -- {1}'.format(x, y)) + ['after {0}'.format(breakpoints[-1])] actionpoints = [x[1] for x in str_pair] actions = ['pre {0}'.format(actionpoints[0])] + useq.merge_adjacent(actionpoints, lambda x, y: 'post {0} pre {1}'.format(x, y)) + ['post {0}'.format(actionpoints[-1])] b = name # df_eng = gr_energy.get_group(b) # df_eng.reset_index(inplace=True) s = bs_dict[b] ar_pair = zip(actions, ranges) # un-comment if need to plot energy and dd trend side by side # plot_trend_fromdb(b, s, breakpoints) print 'plot trend per dd' plot_trend_per_dd_fromdb(b, s, breakpoints) results = [] for a, r in zip(actions, ranges): result = ltm.lean_temperature_fromdb(b, s, 2, r, action=a) if result == None: result = (None, None, None) d = {'building': b, 'station': s, 'timerange': r, 'action': a} result += tuple([d]) results.append(result) length = len(results) if length == 0: print 'no lean plot generated' continue def merge_action(string): string = string.replace('\n', ';') string = string.replace('pre ', '') return string def concat(string): return string.replace('\n', ';') for i in range(len(results) - 1): d_save, d_cvrmse = plot_saving_fromdb(b, s, results[i], results[i + 1]) lines.append(','.join(map(str, [b, breakpoints[i], # (merge_action(actions[i])), concat(actionpoints[i]), d_save['elec_percent'], d_save['gas_percent'], d_save['elec_before'], d_save['elec_after'], d_save['gas_before'], d_save['gas_after'], d_cvrmse['elec'],d_cvrmse['gas']]))) process_html(b, s, results, breakpoints) with open (os.getcwd() + '/plot_FY_weather/html/table/action_saving.csv', 'w+') as wt: wt.write('\n'.join(lines))
def piecewise_reg_one_fromdb(b, s, n_par, theme, cuttail, timerange=None, *args): sns.set_style("whitegrid") sns.set_palette("Set2") sns.set_context("paper", font_scale=1) if len(args) == 0: conn = uo.connect('all') with conn: df = pd.read_sql('SELECT * FROM EUAS_monthly_weather WHERE Building_Number = \'{0}\''.format(b), conn) conn.close() else: df = args[0] # df = df[df[s].notnull()] # df['year'] = df['timestamp'].map(lambda x: int(x[:4])) # df['month'] = df['timestamp'].map(lambda x: int(x[5:7])) df['timestamp'] = df.apply(lambda r: '{0}-{1}-1'.format(int(r['year']), int(r['month'])), axis=1) df['timestamp'] = pd.to_datetime(df['timestamp']) df.sort(['year', 'month'], inplace=True) if timerange == None: df = df.tail(n=36) else: yearcol, timefilter = util.get_time_filter(timerange) df['in_range'] = df[yearcol].map(timefilter) df = df[df['in_range']] if len(df) > 36 and cuttail: df = df.tail(n=36) if len(df) < 6: print 'not enough data points in {0}'.format(timerange) return None if 'day' in df: df = df[['timestamp', 'year', 'month', 'ave', theme, 'day']] else: df = df[['timestamp', 'year', 'month', 'ave', theme]] df = df[df['ave'].notnull()] x = np.array(df['ave']) y = np.array(df[theme]) # t = np.array(df['begin_time']) t = np.array(df['timestamp']) x_min = x.min() x_max = x.max() break_low = 40 break_high = 81 xd = np.linspace(x_min, x_max, 150) cvrmses = [] ps = [] slope_side = [] breakpoint_cal = [] if n_par == 2: breakpoints = range(break_low, break_high) for breakpoint in breakpoints: def piecewise_linear_leftslope(x, k, intercept): return np.piecewise(x, [x < breakpoint, x >= breakpoint], [lambda x:k * x + intercept, lambda x:k * breakpoint + intercept]) p1 , e1 = optimize.curve_fit(piecewise_linear_leftslope, x, y) cvrmse = CVRMSE(x, y, p1, piecewise_linear_leftslope, n_par) cvrmses.append(cvrmse) ps.append(p1) slope_side.append('left') breakpoint_cal.append(breakpoint) if theme == 'eui_elec': def piecewise_linear_rightslope(x, k, intercept): return np.piecewise(x, [x >= breakpoint, x < breakpoint], [lambda x:k * x + intercept, lambda x:k * breakpoint + intercept]) p2 , e2 = optimize.curve_fit(piecewise_linear_rightslope, x, y) cvrmse = CVRMSE(x, y, p2, piecewise_linear_rightslope, n_par) cvrmses.append(cvrmse) ps.append(p2) slope_side.append('right') breakpoint_cal.append(breakpoint) result = sorted(zip(breakpoint_cal, cvrmses, ps, slope_side), key=lambda x: x[1]) elif n_par == 3: breakpoints = [(i, j) for i in range(break_low, break_high) for j in range(i + 1, break_high)] for (break_1, break_2) in breakpoints: def piecewise_linear(x, k1, b1, k2): x0 = break_1 x1 = break_2 y0 = k1 * x0 + b1 y1 = y0 return np.piecewise(x, [x < x0, x >= x1], [lambda x:k1 * x + b1, lambda x:k2 * (x - x1) + y1, lambda x:y0]) p , e = optimize.curve_fit(piecewise_linear, x, y) cvrmse = CVRMSE(x, y, p, piecewise_linear, n_par) cvrmses.append(cvrmse) ps.append(p) slope_side.append('NA') result = sorted(zip(breakpoints, cvrmses, ps, slope_side), key=lambda x: x[1]) best = result[0] b_point_opt = best[0] p_opt = best[2] slope_side_opt = best[3] cvrmse_opt = best[1] # print theme, slope_side_opt, p_opt # print 'breakpoint: {1}, error: {0}'.format(cvrmse_opt, b_point_opt) if n_par == 2: if slope_side_opt == 'left': def piecewise_linear(x, k, intercept): return np.piecewise(x, [x < b_point_opt, x >= b_point_opt], [lambda x:k * x + intercept, lambda x:k * b_point_opt + intercept]) else: assert(slope_side_opt == 'right') def piecewise_linear(x, k, intercept): return np.piecewise(x, [x >= b_point_opt, x < b_point_opt], [lambda x:k * x + intercept, lambda x:k * b_point_opt + intercept]) elif n_par == 3: def piecewise_linear(x, k1, b1, k2): x0 = b_point_opt[0] x1 = b_point_opt[1] y0 = k1 * x0 + b1 y1 = y0 return np.piecewise(x, [x < x0, x >= x1], [lambda x:k1 * x + b1, lambda x:k2 * (x - x1) + y1, lambda x:y0]) if abs(p_opt[0] - 0) < 1e-20: print 'all zero {0} consumption: {1}'.format(theme, p_opt) good_regression = False return None # bx = plt.axes() # bx.plot(x, y, "o") # g = sns.lmplot(x=s, y=theme, hue='day', data=df, fit_reg=False) # # print piecewise_linear(xd, *p_opt) # plt.ylabel('kBtu') # # plt.gca().set_ylim(bottom=0) # ax = g.axes # ax[0, 0].set_ylim((0, 1e5)) # P.savefig('{3}scatter_{0}_{1}_{2}.png'.format(b, s, theme, image_output_dir), dpi = 150) # # plt.show() # plt.close() # g = sns.lmplot(x=s, y=theme, col='day', hue='day', col_wrap=3, size=3, data=df, fit_reg=False) # ax = g.axes # for i in ax: # i.set_ylim((0, 1e5)) # P.savefig('{3}scatter_{0}_{1}_{2}_3b3.png'.format(b, s, theme, image_output_dir), dpi = 150) # plt.close() plt.plot(x, y, "o") plt.plot(xd, piecewise_linear(xd, *p_opt), "-") plt.title('break point {0}F, CV(RMSE): {1}'.format(b_point_opt, cvrmse_opt)) # plt.show() # P.savefig('{3}regression_{0}_{1}_{2}.png'.format(b, s, theme, image_output_dir), dpi = 150) plt.close() good_regression = True # remove electric heating if theme == 'eui_elec': if slope_side_opt == 'left': print 'bad electric regression left side -----------------' good_regression = False if slope_side_opt == 'right' and p_opt[0] < 0: print 'bad electric regression right side ----------------' good_regression = False if theme == 'eui_gas': if slope_side_opt == 'left' and p_opt[0] > 0: print 'bad gas regression left side ----------------------' good_regression = False if slope_side_opt == 'right' > 0: print 'bad gas regression right side ---------------------' good_regression = False return {'breakpoint': b_point_opt, 'CV(RMSE)': best[1], 'regression_par': p_opt, 'x_range': (x_min, x_max), 'fun': piecewise_linear, 'x': x, 'y': y, 'good_regression': good_regression, 'df': df}
def temp(): conn = uo.connect('interval_ion') with conn: df = pd.read_sql('SELECT * FROM area', conn) df.to_csv(homedir + 'temp/area.csv')
def fit(measure_type, year=None): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_area = pd.read_sql('SELECT * FROM area', conn) df_area.set_index('Building_Number', inplace=True) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) sns.set_style("whitegrid") sns.set_palette("Set2", 2) sns.set_context("talk", font_scale=1) # col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5, 'status':2} # upper = {'electric': 600, 'gas': 2500} value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} multiplier_dict = {'electric': 3.412, 'gas': 1.026} col = value_lb_dict[measure_type] m = multiplier_dict[measure_type] ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'} # test = ['TN0088ZZ', 'TX0057ZZ', 'NY0281ZZ', 'NY0304ZZ', 'MO0106ZZ'] # test = ['NM0050ZZ'] # bs_pair = [x for x in bs_pair if x[0] in test] lines = ['Building_Number,week night save%,weekend day save%,weekend night save%,aggregate save%,CVRMSE week day,CVRMSE week night,CVRMSE weekend day,CVRMSE weekend night'] print len(bs_pair) # bs_pair = bs_pair[:1] for b, s in bs_pair: print b, s try: area = df_area.ix[b, 'Gross_Sq.Ft'] except KeyError: print 'No area found' continue df = join_interval(b, s, area, col, m, measure_type, conn, year) if len(df) == 0: continue df.to_csv(homedir + 'temp/{0}.csv'.format(b)) df = df[df[col] >= 0] points = df[col] # outliers = show_outlier(points, b, 'upper', measure_type, 5) outliers = show_outlier(points, b, 'upper', measure_type, 1.5) df['outlier'] = outliers df = df[~np.array(outliers)] df['status_week_day_night'] = \ df.apply(lambda r: util.get_status(r['hour'], r['day']), axis=1) min_time = df['Timestamp'].min() max_time = df['Timestamp'].max() sns.set_style("whitegrid") colors = sns.color_palette('Paired', 16) colors_rgb = [util.float2hex(x) for x in colors] sns.set_context("talk", font_scale=1) gr = df.groupby('status_week_day_night') f, axarr = plt.subplots(2, 2, sharex=True, sharey=True) d0 = plot_piece(gr, axarr[0, 0], 'week day', colors_rgb[0], measure_type, b, s) if not d0 is None: axarr[0, 0].set_title('{0}\nbreak point {1}F, CV(RMSE): {2:.3f}'.format('week day', d0['breakpoint'], d0['CV(RMSE)'])) d1 = plot_piece(gr, axarr[0, 1], 'week night', colors_rgb[1], measure_type, b, s) d2 = plot_piece(gr, axarr[1, 0], 'weekend day', colors_rgb[2], measure_type, b, s) d3 = plot_piece(gr, axarr[1, 1], 'weekend night', colors_rgb[3], measure_type, b, s) save, err = compute_saving_all(d0, d1, d2, d3, axarr) plt.suptitle('{0} -- {1}'.format(min_time, max_time)) f.text(0.5, 0.04, 'Temperature_F', ha='center', va='center') if year is None: path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise/{1}/{0}_{1}.png'.format(b, measure_type) else: path = os.getcwd() + '/input/FY/interval/ion_0627/piecewise/{1}/{0}_{1}_{2}.png'.format(b, measure_type, int(year)) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') shutil.copy(path, path.replace('input/FY/interval/ion_0627/piecewise', 'plot_FY_weather/html/interval/lean')) plt.close() lines.append(','.join([b] + save + err)) if year is None: table_path = os.getcwd() + '/input/FY/interval/ion_0627/table/{0}_save.csv'.format(measure_type) else: table_path = os.getcwd() + '/input/FY/interval/ion_0627/table/{0}_save_{1}.csv'.format(measure_type, int(year)) with open(table_path, 'w+') as wt: wt.write('\n'.join(lines)) return
def read_interval_building(b): conn = uo.connect('interval_ion') with conn: df = pd.read_sql('SELECT * FROM electric WHERE Building_Number = \'{0}\''.format(b), conn) df.to_csv(homedir + 'temp/{0}_int.csv'.format(b))
def plot_scatter_long(hue_str, measure_type): conn = uo.connect('interval_ion') with conn: df_bs = pd.read_sql('SELECT * FROM {0}_id_station'.format(measure_type), conn) df_area = pd.read_sql('SELECT * FROM area', conn) df_area.set_index('Building_Number', inplace=True) bs_pair = zip(df_bs['Building_Number'], df_bs['ICAO']) sns.set_style("whitegrid") sns.set_palette("Set2", 2) sns.set_context("talk", font_scale=1.5) col_wrap_dict = {'hour': 6, 'month': 4, 'day': 5, 'status':2} upper = {'electric': 600, 'gas': 2500} value_lb_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} multiplier_dict = {'electric': 3.412, 'gas': 1.026} ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'} # test = ['TX0057ZZ', 'NY0281ZZ', 'NY0304ZZ', 'MO0106ZZ'] # bs_pair = [x for x in bs_pair if x[0] in test] for b, s in bs_pair: print b, s # df_w = pd.read_sql('SELECT * FROM {0} WHERE Timestamp between \'\2015-01-01\' and \'2016-01-01\''.format(s), conn) col = value_lb_dict[measure_type] m = multiplier_dict[measure_type] with conn: df_w = pd.read_sql('SELECT * FROM {0}'.format(s), conn) df_minute = pd.read_sql('SELECT Timestamp, [{0}] FROM {1} WHERE Building_Number = \'{2}\''.format(col, measure_type, b), conn) df_minute['h'] = df_minute['Timestamp'].map(lambda x: x[:-5] + '00:00') df_e = df_minute.groupby('h').sum() df_e.reset_index(inplace=True) df_e.rename(columns={'h': 'Timestamp'}, inplace=True) try: area = df_area.ix[b, 'Gross_Sq.Ft'] except KeyError: print 'no area' continue df_e['eui'] = df_e[col] * m / area local = pd.to_datetime(df_w['Timestamp']).map(lambda x: x - np.timedelta64(5, 'h')) local_str = local.map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')) df_w['Timestamp'] = local_str df_all = pd.merge(df_w, df_e, on='Timestamp', how='inner') df_all['hour'] = df_all['Timestamp'].map(lambda x: x[11:13]) df_all['month'] = df_all['Timestamp'].map(lambda x: x[5:7]) df_all['year'] = df_all['Timestamp'].map(lambda x: x[:4]) df_all['day'] = df_all['Timestamp'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%a')) df_all['status'] = df_all.apply(lambda r: 'other' if float(r['hour']) < 6 or float(r['hour']) > 18 or r['day'] in ['Sat', 'Sun'] else 'work hour', axis=1) df_all['status_week_day_night'] = \ df_all.apply(lambda r: get_status_v0(r['hour'], r['day']), axis=1) df_all['status_week_day_night_nonflex'] = \ df_all.apply(lambda r: get_status_v0(r['hour'], r['day']), axis=1) df_all.to_csv('/home/yujiex/Public/{0}.csv'.format(b), index=False) df_all['method'] = 'non flex week' df_all2 = df_all.copy() df_all2['status_week_day_night'] = \ df_all2.apply(lambda r: get_status(r['hour'], r['day']), axis=1) df_all2['method'] = 'flex week' if hue_str is None: sns.regplot(x='Temperature_F', y='eui', data=df_all, fit_reg=False) plt.ylabel(ylabel[measure_type]) plt.title('{0} vs Temperature (F): {1}'.format(ylabel[measure_type], b)) # plt.ylim((0, upper[measure_type])) elif hue_str is 'status': g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str, hue_order=['work hour', 'other'], palette='husl', size=5, data=df_all, fit_reg=False) plt.ylabel(ylabel[measure_type]) g.set(ylabel=ylabel[measure_type]) if measure_type == 'electric': plt.ylim((0, 0.003 * 4)) elif measure_type == 'gas': plt.ylim((0, 0.008 * 4)) min_time = df_all['Timestamp'].min() max_time = df_all['Timestamp'].max() print min_time, max_time plt.title('{0} {1} Setback'.format(b, measure_type.title())) plt.suptitle('{0} -- {1}'.format(min_time, max_time)) plt.subplots_adjust(top=0.85) elif hue_str == 'status_week_day_night_nonflex': df_plot = df_all.groupby('year').filter(lambda x: len(x) > 7500) if len(df_plot) == 0: print 'not enough data points' continue g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str, col='year', hue_order=['week day', 'weekend day', 'week night', 'weekend night'], palette='husl', # palette='Paired', size=5, aspect = 1.0, data=df_plot, fit_reg=False, scatter_kws={'s':8}, legend=False) # lowess=True, scatter_kws={'s':4}) plt.ylabel(ylabel[measure_type]) g.set(ylabel=ylabel[measure_type]) if measure_type == 'electric': plt.ylim((0, 0.003 * 4)) elif measure_type == 'gas': plt.ylim((0, 0.008 * 4)) # plt.ylim((0, 0.04)) min_time = df_all['Timestamp'].min() max_time = df_all['Timestamp'].max() plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), markerscale=3) print min_time, max_time # plt.title('{0} {1} Setback'.format(b, # measure_type.title())) # plt.suptitle('{0} -- {1}'.format(min_time, max_time)) title = '{0} {1} Setback\n{2} -- {3}'.format(b, measure_type.title(), min_time, max_time) plt.suptitle(title) plt.subplots_adjust(top=0.85) elif hue_str == 'status_week_day_night': # check distribution of the shooting up ones # df_all = df_all[(df_all['Temperature_F'] < 50) & (df_all['Temperature_F'] > 45)] # df_all = df_all[df_all['status_week_day_night'] == 'week day'] # print df_all.describe() # sns.distplot(df_all[df_all['eui'] < 40]['eui']) # print len(df_all[df_all['eui'] > 40]) df_con = pd.concat([df_all, df_all2], ignore_index=True) g = sns.lmplot(x='Temperature_F', y='eui', hue=hue_str, col='year', row='method', hue_order=['week day', 'weekend day', 'week night', 'weekend night'], palette='husl', # palette='Paired', size=5, aspect = 1.0, data=df_con, fit_reg=False) # lowess=True, scatter_kws={'s':4}) plt.ylabel(ylabel[measure_type]) g.set(ylabel=ylabel[measure_type]) if measure_type == 'electric': plt.ylim((0, 0.003 * 4)) elif measure_type == 'gas': plt.ylim((0, 0.008 * 4)) # plt.ylim((0, 0.04)) min_time = df_all['Timestamp'].min() max_time = df_all['Timestamp'].max() print min_time, max_time # plt.title('{0} {1} Setback'.format(b, # measure_type.title())) # plt.suptitle('{0} -- {1}'.format(min_time, max_time)) title = '{0} {1} Setback\n{2} -- {3}'.format(b, measure_type.title(), min_time, max_time) plt.suptitle(title) plt.subplots_adjust(top=0.85) elif hue_str == 'cluster': cl.plot_cluster(df_all, b, s, measure_type, 0.18, 100) continue elif hue_str == 'piece': df_all = df_all[df_all[b] > 0] plot_piecewise(measure_type, df_all, b, s) else: g = sns.lmplot(x='Temperature_F', y=b, hue=hue_str, col=hue_str, col_wrap=col_wrap_dict[hue_str], size=3, data=df_all, fit_reg=False) g.set(ylim=(0, upper[measure_type])) g.set(ylabel=ylabel[measure_type]) plt.gca().set_ylim(bottom=0) # plt.show() path = os.getcwd() + '/input/FY/interval/ion_0627/{0}_scatter/{1}_{2}_{3}'.format(measure_type, b, s, hue_str) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') plt.close() return
def fit_time(measure_type, region, season=None): conn = uo.connect('all') with conn: df_bs = pd.read_sql( 'SELECT Building_Number, ICAO, eui_elec, eui_gas, year, month, ave FROM EUAS_monthly_weather', conn) df_region = pd.read_sql( 'SELECT DISTINCT Building_Number, [Region_No.] FROM EUAS_monthly', conn) if measure_type == 'electric': good_set = gbs.get_energy_set('eui_elec') elif measure_type == 'gas': good_set = gbs.get_energy_set('gas') df_bs = pd.merge(df_bs, df_region, on='Building_Number', how='left') df_bs = df_bs[df_bs['Building_Number'].map(lambda x: x in good_set)] df_bs = df_bs[df_bs['Region_No.'] == str(region)] df_bs = df_bs[df_bs['eui_elec'].notnull()] df_bs = df_bs[df_bs['eui_gas'].notnull()] df_bs.sort_values(by=['Building_Number', 'year', 'month'], ascending=False, inplace=True) bs_pair = list(set(zip(df_bs['Building_Number'], df_bs['ICAO']))) df_bs['Timestamp'] = df_bs.apply( lambda r: '{}-{}'.format(int(r['year']), int(r['month'])), axis=1) sns.set_style("whitegrid") sns.set_context("talk", font_scale=1) ylabel = {'electric': 'electric (kBtu/sq.ft)', 'gas': 'gas kBtu/sq.ft'} col_dict = {'electric': 'eui_elec', 'gas': 'eui_gas'} print len(bs_pair) sns.set_style("whitegrid") # palette = sns.cubehelix_palette(len(bs_pair)) palette = sns.color_palette('husl', len(bs_pair)) sns.set_palette(palette) colors_rgb = [util.float2hex(x) for x in palette] sns.set_context("talk", font_scale=1) jsondir = os.getcwd( ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/json/'.format( region) title = "Region {}".format(region) col = col_dict[measure_type] for i, (b, s) in enumerate(bs_pair): print b, s df = df_bs[df_bs['Building_Number'] == b] df = df.head(n=36) print df.head() points = df[col] min_time = df['Timestamp'].min() max_time = df['Timestamp'].max() bx = plt.axes() d0 = plot_piece(df, bx, title, colors_rgb[i], measure_type, b, s, scatter=False, annote=True, jsondir=jsondir, season=season) plt.xlabel('Temperature_F') # plt.show() if season is None: path = os.getcwd( ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/{}.png'.format( region, measure_type) else: path = os.getcwd( ) + '/plot_FY_weather/html/by_region/Region{}/piecewise_all/{}_{}.png'.format( region, measure_type, season) P.savefig(path, dpi=my_dpi, figsize=(2000 / my_dpi, 500 / my_dpi), bbox_inches='tight') plt.close() return
import sqlite3 import pandas as pd import numpy as np import os import geocoder import time import util_io as uo homedir = os.getcwd() + '/csv_FY/' # project_dir = '/media/yujiex/work/project/data/' project_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '/data/' conn = uo.connect('interval_ion') with conn: df1 = pd.read_sql('SELECT * FROM electric_id', conn) df2 = pd.read_sql('SELECT * FROM gas_id', conn) df1['has_electric'] = 1 df2['has_gas'] = 1 df = pd.merge(df1, df2, on='id', how='outer') df.rename(columns={'id': 'Building_Number'}, inplace=True) conn.close() conn = uo.connect('other_input') with conn: df3 = pd.read_sql('SELECT * FROM Entire_GSA_Building_Portfolio_input', conn) df = pd.merge(df, df3, on='Building_Number', how='left') df['in_facility'] = df.apply( lambda r: 1 if r['Building_Number'] == r['Facility_ID'] else np.nan, axis=1)
def compare_interval_db(b): conn = uo.connect('ION data.db') with conn: df = read_sql('SELECT * FROM ION_electricity WHERE Building_Number = \'{0}\''.format(b)) df.info()