def form_to_data(form): building_volume = form['building_volume'].data from_date = form["from_date"].data to_date = form['to_date'].data building_type = form["building_type"].data set_temp = form["set_temp"].data rad_norm = form["rad_norm"].data rad_hor = form["rad_hor"].data out_temp = form["out_temp"].data location = form['location'].data humidity = form['humidity'].data #use date.timetuple().tm_yday if combine both month and day to a single number result = [] days = get_between_dates(from_date, to_date) for day in days: for hour in range(24): result.append([ building_volume, day.month, day.day, hour, getRValue(building_type), set_temp, rad_norm, rad_hor, out_temp, humidity ]) result = np.array(result) return result
def comparative_form_to_data(form): building_volume = form['building_volume'].data building_volume2 = form['building_volume2'].data from_date = form["from_date"].data to_date = form['to_date'].data building_type = form["building_type"].data building_type2 = form['building_type2'].data set_temp = form["set_temp"].data set_temp2 = form['set_temp2'].data rad_norm = form["rad_norm"].data rad_hor = form["rad_hor"].data out_temp = form["out_temp"].data location = form['location'].data humidity = form['humidity'].data humidity2 = form['humidity2'].data result1 = [] result2 = [] days = get_between_dates(from_date, to_date) for day in days: for hour in range(24): result1.append([ building_volume, day.month, day.day, hour, getRValue(building_type), set_temp, rad_norm, rad_hor, out_temp, humidity ]) result2.append([ building_volume2, day.month, day.day, hour, getRValue(building_type2), set_temp2, rad_norm, rad_hor, out_temp, humidity2 ]) result1 = np.array(result1) result2 = np.array(result2) return result1, result2
def add_to_database(data_file): query = ''' INSERT INTO dbo.building_data ([buildingName] ,[buildingVolume] ,[inDatetime] ,[RValue] ,[setPoint] ,[irradiationNorm] ,[irradiationHor] ,[outsideTemp] ,[location] ,[humidity] ,[heating] ,[cooling]) VALUES (? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,?)''' data = pd.read_csv(data_file, header=0) complete_data(data) #it seems that cnxn can't take numpy.int64 type. So converting numbers to float vals = [(data[data.columns[0]][i], float(data[data.columns[1]][i]), datetime.datetime(1970, int(data[data.columns[3]][i]), int(data[data.columns[4]][i]), get_hour(int(data[data.columns[5]][i])), 0, 0), getRValue(data[data.columns[2]][i]), float(data[data.columns[6]][i]), float(data[data.columns[7]][i]), float(data[data.columns[8]][i]), float(data[data.columns[9]][i]), data[data.columns[10]][i], float(data[data.columns[11]][i]), float(data[data.columns[12]][i]), float(data[data.columns[13]][i])) for i in range(len(data))] cursor.executemany(query, vals) cnxn.commit()
def file_to_data(df): #get R value from the building type df['RValue'] = df.apply(lambda x: getRValue(x[df.columns[2]]), axis=1) df = df.filter(items=[ df.columns[1], df.columns[3], df.columns[4], df.columns[5], df.columns[6], 'RValue', df.columns[7], df.columns[8], df.columns[9], df.columns[11] ]) #fill in the data that are missing fill_empty_data(df) #it's possible that the entire column is missing. Replace it with 0 in that case df.fillna(0, inplace=True) return df.values
from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.utils import shuffle from sklearn.externals import joblib import pandas as pd from data_utils.data_utils import getRValue if __name__ == '__main__': df = pd.read_csv('ML_Fullstack/example_input/fake_data_2.csv', header=0) df = shuffle(df) df['RValue'] = df.apply(lambda x: getRValue(x[df.columns[2]]), axis=1) data_indices = [1, 3, 4, 5, 6, 7, 8, 9, 11, 14] label_indices = [12, 13] data_data = df.filter(items = [df.columns[idx] for idx in data_indices]).values data_label = df.filter(items = [df.columns[idx] for idx in label_indices]).values classifier = RandomForestRegressor(n_estimators=100) data_train, data_test, label_train, label_test = train_test_split(data_data, data_label, test_size=0.2, random_state=1) classifier.fit(data_train, label_train) import time start = time.time() score = classifier.score(data_test, label_test) end = time.time()