Esempio n. 1
0
def run():
    # from get_data import get_data
    # data,compare_data,_=get_data()
    from params import j, term
    for i in range(term):
        from get_data import get_data
        data, compare_data, _ = get_data()
        j += rmse(GAIN(data), compare_data)
    print(j / term)
def run():
    data,compare_data=get_data()
    from params import j,term
    for i in range(term):
        data,compare_data=get_data()   
        X_train, x_test, Y_train, y_test = train_test_split(data,compare_data,random_state=0)
        lr=lr_emsemble_train(X_train,Y_train)
        j+=rmse(lr_emsemble_predict(x_test),y_test)
    print(j/term)
Esempio n. 3
0
def run():
    from params import j, term
    for i in range(term):
        from get_data import get_data
        data, compare_data, _ = get_data()
        d = em(data)

        from pandas import DataFrame
        d = DataFrame(d,
                      index=compare_data.index,
                      columns=compare_data.columns)
        for index, col in enumerate(compare_data):
            d[col] = d[col].astype(str(compare_data[col].dtype))
        j += rmse(d, compare_data)
    print(j / term)
Esempio n. 4
0
def run():
    mark = []
    from params import j, term
    for i in range(term):
        data, compare_data, _ = get_data()
        d = data.copy()
        mask = data.isnull()
        model = train(data.fillna(0).values, mask, 30)
        filled_data = mida(model, data.fillna(0).values)

        from pandas import DataFrame
        filled_data = DataFrame(filled_data,
                                index=compare_data.index,
                                columns=compare_data.columns)
        for index, col in enumerate(compare_data):
            filled_data[col] = filled_data[col].astype(
                str(compare_data[col].dtype))

        j += rmse(filled_data, compare_data)
        mark.append(rmse(filled_data, compare_data))
    # for i,j in enumerate(mark):
    #     print(i+2," err:",j)
    print(mark)
    print(np.sum(mark) / len(mark))
Esempio n. 5
0
def best_test(op="RMS"):
    best_len1 = 5
    best_len2 = 10
    best_loss = 999999999999.
    for len2 in range(5, 10):
        for len1 in range(len2 + 1, len(data.columns)):
            d = test(len1, len2, data, op)
            # print(d)
            for col in compare_data.columns:
                d[col] = d[col].astype(str(compare_data[col].dtype))
            #print(d)
            # input()
            t_loss = rmse(d, compare_data)
            if (float(t_loss) < best_loss):
                best_len1 = len1
                best_len2 = len2
                best_loss = t_loss
    return best_len1, best_len2, best_loss
Esempio n. 6
0
def netflix_eval (probeFile, movieIDYear, custDecadeAvgRatings, movieIDAvgRating, custIDAvgRating, actualRatings) :
    """
    Applies heuristics to predict ratings and calculates the RMSE
    probeFile is the path to probe.txt from the command line
    movieIDAvgRating is the dictionary of {movie ID, average rating)
    custIDAvgRating is the dictionary of (cust ID, average rating)
    actualRatings is the list of actual customer ratings for RMSE calculation
    """
    
    movieIDpredRatings = {} # {movieID:[ratings]} for printing
    allPredRatings = []
    with open(probeFile, 'r') as f_myfile:
       lines = f_myfile.readlines()
       movieID = ""
       for line in lines :
            if re.search(':', line) : #movieID
		     predRatings = []
		     movieID = line.strip(':\r\n')
            else :
               assert movieID
               custID = line.strip() #strip newline
               
               #look up year
               year = movieIDYear[movieID]
            
               #determine the decade 
               decade = netflix_decade_calc(year)
               
               if decade in custDecadeAvgRatings[custID]:
                   custDecadeRating = float(custDecadeAvgRatings[custID][decade])
               else : #use individual customer average if customer didn't rate any movies in that decade
                   custDecadeRating =  float(custIDAvgRating[custID])
                
               pred = (.4*float(movieIDAvgRating[movieID]) + .6*custDecadeRating) 		#RMSE = 0.971
               assert type(pred) is float
               predRatings.append(pred)
               allPredRatings.append(pred)
               movieIDpredRatings[movieID] = predRatings
                
    answer_rmse = rmse(actualRatings, allPredRatings)
    
    return [answer_rmse, movieIDpredRatings]
Esempio n. 7
0
def run():
    data, compare_data = get_data()
    data = compare_data
    rows, cols = data.shape
    shuffled_index = np.random.permutation(rows)
    train_index = shuffled_index[:int(rows * (1 - test_size))]
    test_index = shuffled_index[int(rows * (1 - test_size)):]

    train_data = data.values[train_index, :]
    test_data = data.values[test_index, :]

    scaler = MinMaxScaler()
    scaler.fit(train_data)

    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)

    data, mask = missing_method(test_data, mechanism, method, miss_rato)
    missed_data = torch.from_numpy(data).double()
    train_data = torch.from_numpy(train_data).double()

    model = train(train_data)
    filled_data = mida(model, test_data)

    from RMSE import rmse
    from pandas import DataFrame
    filled_data = DataFrame(filled_data,
                            index=compare_data.index,
                            columns=compare_data.columns)
    for index, col in enumerate(compare_data):
        filled_data[col] = filled_data[col].astype(str(
            compare_data[col].dtype))

    err = rmse(filled_data, test_data)
    print(filled_data, test_data)
    print("err:", err)
Esempio n. 8
0
 def test_rmse4 (self) :
     v = rmse(['5', '3', '2', '4', '5', '1', '4', '1', '5', '1', '5', '1', '2', '4', '5', '1', '3', '4', '2', '4'], 
              ['2', '1', '3', '1', '1', '2', '4', '3', '1', '2', '2', '3', '2', '1', '1', '3', '4', '1', '5', '3'])
     self.assert_(str(v) == '2.47991935353')
Esempio n. 9
0
 def test_rmse3 (self) :
     v = rmse(['5', '3', '2', '4', '5'], ['2', '4', '3', '1', '2'])
     self.assert_(str(v) == '2.40831891576')
Esempio n. 10
0
 def test_rmse2 (self) :
     v = rmse(['1', '1', '1', '1', '1'], ['5', '5', '5', '5', '5'])
     self.assert_(v == 4.0)
Esempio n. 11
0
 def test_rmse (self) :
     v = rmse(['3', '3', '3'], ['3', '3', '3'])
     self.assert_(v == 0.0)