def run(): # from get_data import get_data # data,compare_data,_=get_data() from params import j, term for i in range(term): from get_data import get_data data, compare_data, _ = get_data() j += rmse(GAIN(data), compare_data) print(j / term)
def run(): data,compare_data=get_data() from params import j,term for i in range(term): data,compare_data=get_data() X_train, x_test, Y_train, y_test = train_test_split(data,compare_data,random_state=0) lr=lr_emsemble_train(X_train,Y_train) j+=rmse(lr_emsemble_predict(x_test),y_test) print(j/term)
def run(): from params import j, term for i in range(term): from get_data import get_data data, compare_data, _ = get_data() d = em(data) from pandas import DataFrame d = DataFrame(d, index=compare_data.index, columns=compare_data.columns) for index, col in enumerate(compare_data): d[col] = d[col].astype(str(compare_data[col].dtype)) j += rmse(d, compare_data) print(j / term)
def run(): mark = [] from params import j, term for i in range(term): data, compare_data, _ = get_data() d = data.copy() mask = data.isnull() model = train(data.fillna(0).values, mask, 30) filled_data = mida(model, data.fillna(0).values) from pandas import DataFrame filled_data = DataFrame(filled_data, index=compare_data.index, columns=compare_data.columns) for index, col in enumerate(compare_data): filled_data[col] = filled_data[col].astype( str(compare_data[col].dtype)) j += rmse(filled_data, compare_data) mark.append(rmse(filled_data, compare_data)) # for i,j in enumerate(mark): # print(i+2," err:",j) print(mark) print(np.sum(mark) / len(mark))
def best_test(op="RMS"): best_len1 = 5 best_len2 = 10 best_loss = 999999999999. for len2 in range(5, 10): for len1 in range(len2 + 1, len(data.columns)): d = test(len1, len2, data, op) # print(d) for col in compare_data.columns: d[col] = d[col].astype(str(compare_data[col].dtype)) #print(d) # input() t_loss = rmse(d, compare_data) if (float(t_loss) < best_loss): best_len1 = len1 best_len2 = len2 best_loss = t_loss return best_len1, best_len2, best_loss
def netflix_eval (probeFile, movieIDYear, custDecadeAvgRatings, movieIDAvgRating, custIDAvgRating, actualRatings) : """ Applies heuristics to predict ratings and calculates the RMSE probeFile is the path to probe.txt from the command line movieIDAvgRating is the dictionary of {movie ID, average rating) custIDAvgRating is the dictionary of (cust ID, average rating) actualRatings is the list of actual customer ratings for RMSE calculation """ movieIDpredRatings = {} # {movieID:[ratings]} for printing allPredRatings = [] with open(probeFile, 'r') as f_myfile: lines = f_myfile.readlines() movieID = "" for line in lines : if re.search(':', line) : #movieID predRatings = [] movieID = line.strip(':\r\n') else : assert movieID custID = line.strip() #strip newline #look up year year = movieIDYear[movieID] #determine the decade decade = netflix_decade_calc(year) if decade in custDecadeAvgRatings[custID]: custDecadeRating = float(custDecadeAvgRatings[custID][decade]) else : #use individual customer average if customer didn't rate any movies in that decade custDecadeRating = float(custIDAvgRating[custID]) pred = (.4*float(movieIDAvgRating[movieID]) + .6*custDecadeRating) #RMSE = 0.971 assert type(pred) is float predRatings.append(pred) allPredRatings.append(pred) movieIDpredRatings[movieID] = predRatings answer_rmse = rmse(actualRatings, allPredRatings) return [answer_rmse, movieIDpredRatings]
def run(): data, compare_data = get_data() data = compare_data rows, cols = data.shape shuffled_index = np.random.permutation(rows) train_index = shuffled_index[:int(rows * (1 - test_size))] test_index = shuffled_index[int(rows * (1 - test_size)):] train_data = data.values[train_index, :] test_data = data.values[test_index, :] scaler = MinMaxScaler() scaler.fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) data, mask = missing_method(test_data, mechanism, method, miss_rato) missed_data = torch.from_numpy(data).double() train_data = torch.from_numpy(train_data).double() model = train(train_data) filled_data = mida(model, test_data) from RMSE import rmse from pandas import DataFrame filled_data = DataFrame(filled_data, index=compare_data.index, columns=compare_data.columns) for index, col in enumerate(compare_data): filled_data[col] = filled_data[col].astype(str( compare_data[col].dtype)) err = rmse(filled_data, test_data) print(filled_data, test_data) print("err:", err)
def test_rmse4 (self) : v = rmse(['5', '3', '2', '4', '5', '1', '4', '1', '5', '1', '5', '1', '2', '4', '5', '1', '3', '4', '2', '4'], ['2', '1', '3', '1', '1', '2', '4', '3', '1', '2', '2', '3', '2', '1', '1', '3', '4', '1', '5', '3']) self.assert_(str(v) == '2.47991935353')
def test_rmse3 (self) : v = rmse(['5', '3', '2', '4', '5'], ['2', '4', '3', '1', '2']) self.assert_(str(v) == '2.40831891576')
def test_rmse2 (self) : v = rmse(['1', '1', '1', '1', '1'], ['5', '5', '5', '5', '5']) self.assert_(v == 4.0)
def test_rmse (self) : v = rmse(['3', '3', '3'], ['3', '3', '3']) self.assert_(v == 0.0)