Ejemplo n.º 1
0
    def test_step(self, batch, batch_nb):
        # TODO: plot
        y, x, uids = (emiss, laser_params, uids) = batch

        x_pred = self(y)
        with torch.no_grad():
            x_loss = rmse(x_pred, x)
            self.log("backward/test/x/loss", x_loss, prog_bar=True)
        if self.forward_model is not None:
            y_pred = self.forward_model(x_pred)
            y_loss = rmse(y_pred, y)

            self.log(
                "backward/test/y/loss",
                y_loss,
                prog_bar=True,
            )
            loss = y_loss

            torch.save(x, "/data-new/alok/laser/params_true_back.pt")
            torch.save(y, "/data-new/alok/laser/emiss_true_back.pt")
            torch.save(y_pred, "/data-new/alok/laser/emiss_pred.pt")
            torch.save(x_pred, "/data-new/alok/laser/param_pred.pt")

        nngraph.save_integral_emiss_point(
            y_pred,
            y,
            "/data-new/alok/laser/backwards_test_points.txt",
            all_points=True)
        return loss
Ejemplo n.º 2
0
    def training_step(self, batch, _batch_nb):
        # TODO: plot
        y, x, uids = (emiss, laser_params, uids) = batch

        x_pred = self(y)
        with torch.no_grad():
            x_loss = rmse(x_pred, x)
            self.log("backward/train/x/loss", x_loss, prog_bar=True)

        if self.forward_model is not None:
            y_pred = self.forward_model(x_pred)
            y_loss = rmse(y_pred, y)

            self.log(
                "backward/train/y/loss",
                y_loss,
                prog_bar=True,
            )

            loss = y_loss

        if self.current_epoch == self.config["backward_num_epochs"] - 5:
            nngraph.save_integral_emiss_point(
                y_pred,
                y,
                "/data-new/alok/laser/backwards_train_points.txt",
                all_points=True,
            )
        self.log(f"backward/train/loss", loss, prog_bar=True)

        return loss
Ejemplo n.º 3
0
def evaluate(model, loader, task):
    model.eval()
    y_hat_list = []
    y_list = []
    for _ in range(loader.steps):
        graph_2d, graph_3d, y = loader.next_batch()
        y_hat = model(graph_2d, graph_3d)
        y_hat_list += y_hat.tolist()
        y_list += y.tolist()

    y_hat = np.array(y_hat_list)
    y = np.array(y_list)
    if task == 'regression':
        score = rmse(y, y_hat)
    else:
        auc_score_list = []
        if y.shape[1] > 1:
            for label in range(y.shape[1]):
                true, pred = y[:, label], y_hat[:, label]
                # all 0's or all 1's
                if len(true[np.where(true >= 0)]) == 0:
                    continue
                if len(set(true[np.where(true >= 0)])) == 1:
                    auc_score_list.append(float('nan'))
                else:
                    auc_score_list.append(
                        roc_auc_score(true[np.where(true >= 0)],
                                      pred[np.where(true >= 0)]))
            score = np.nanmean(auc_score_list)
        else:
            score = roc_auc_score(y, y_hat)
    return score
Ejemplo n.º 4
0
def main():
    results = init_results()
    for model_name, predictor in predictors.items():
        for sku in configuration.SKUS:
            for period_ind in range(len(configuration.PERIODS)):
                period = configuration.PERIODS[period_ind]
                res_path = configuration.FORECAST_RES_DIR + model_name + "\\" + sku + "\\" + str(
                    period_ind)
                end_of_period = period[1]
                real_series = loader.load_test_sku(
                    sku,
                    base_dir=configuration.BASE_DIR,
                    end_of_period=end_of_period)
                train, test = train_test_split(real_series,
                                               configuration.N_PREDS)
                train = utils.remove_holidays(train)
                predictor.fit(train, configuration.N_PREDS)
                forecast = predictor.predict(configuration.N_PREDS)
                resid = predictor.resid
                forecast_scaled = utils.scale_by_max(forecast)
                test_scaled = utils.scale_by_max(test)
                save_plot(test_scaled, forecast_scaled, end_of_period,
                          res_path)
                save_forecast_resid(forecast, resid, res_path)
                mape = utils.mape(y_true=test, y_pred=forecast)
                rmse = utils.rmse(y_true=test_scaled, y_pred=forecast_scaled)
                save_result(results, model_name, sku, period_ind, mape, rmse,
                            predictor.describe())
Ejemplo n.º 5
0
def addnoise(name: str,
             zero_mean_gaussian_noise_sd: int = 5,
             percent_gaussian_impulse_noise: int = 5,
             impulse_noise_sd: int = 100):
    """ Create a noisy image
    percent_gaussian_impulse_noise percentage pixels in the image will have gaussian impulse noise
    having mean 128 and standard deviation as impulse_noise_sd
    The other 100 - percent_gaussian_impulse_noise percentage pixels in the image will be added with
    0 mena gaussian noise having standard deviation as zero_mean_gaussian_noise_sd """
    imagepath = os.path.join('images', 'original', name)
    img = imageio.imread(imagepath).astype(float)
    inp_img = np.array(img, dtype=float, copy=True)
    assert img.shape == (256, 256)

    percent_noise = percent_gaussian_impulse_noise / 100
    for i in range(len(img)):
        for j in range(len(img)):
            r = random.uniform(0, 1)
            if r < percent_noise:
                img[i][j] = random.gauss(128, impulse_noise_sd)
            else:
                img[i][j] += random.gauss(0, zero_mean_gaussian_noise_sd)

    oppath = os.path.join(
        'images', 'noisy',
        '{}_{}_{}_{}.png'.format(name[:-4], zero_mean_gaussian_noise_sd,
                                 percent_gaussian_impulse_noise,
                                 impulse_noise_sd))
    img = np.clip(img, 0, 255)
    img = img.astype(np.uint8)
    imageio.imwrite(oppath, img)

    print('RMSE between generated noisy image and original image: {}'.format(
        rmse(inp_img, img)))
Ejemplo n.º 6
0
    def predict(self, filename = 'result.txt'):

        y = []
        y_pred = []

        self.prediction = []
        for i in range(len(self.test_x)):
            curr_frame = self.test_x[i]
            pred = self.model.predict(curr_frame[newaxis,:,:])
            self.prediction.append(pred[0,-1])

            if not self.norm:
              y.append(self.test_y[i,-1])
              y_pred.append(pred[0,-1])
              print self.test_x[i,:,-1],y[-1], y_pred[-1]
            else:
              test_x_inverse = self.scaler.inverse_transform(self.test_x[i,:,-1])
              y.append(self.scaler.inverse_transform([self.test_y[i,-1]])[0])
              y_pred.append(self.scaler.inverse_transform([pred[0,-1]])[0])
              print test_x_inverse,y[-1], y_pred[-1]

        r = rmse(y,y_pred)
        print 'RMSE:', r
        with open(filename, 'a') as fout:
          fout.write('%s\t%s\t%.4f\n'%(
            self.companies, self.timeseries_type,r ))
Ejemplo n.º 7
0
    def evaluate(self, test_data, scale):
        test_loader = generate_loader(path=test_data,
                                      scale=scale,
                                      train=False,
                                      batch_size=1,
                                      num_workers=1,
                                      shuffle=False,
                                      drop_last=False)

        HRs, SRs = list(), list()
        for _, inputs in enumerate(test_loader):
            HR = inputs[0].to(self.device)
            LR = inputs[1].to(self.device)
            with torch.no_grad():
                SR = self.G(LR, scale).detach()

            HR = HR.cpu().clamp(0, 1).squeeze(0).permute(1, 2, 0).numpy()
            SR = SR.cpu().clamp(0, 1).squeeze(0).permute(1, 2, 0).numpy()
            HRs.append(HR)
            SRs.append(SR)

        rmse = utils.rmse(HRs, SRs, scale)
        lpips = utils.LPIPS(HRs, SRs, scale)

        return rmse, lpips
Ejemplo n.º 8
0
def main():
    matplotlib.rcParams["figure.dpi"] = 200
    matplotlib.rcParams["savefig.dpi"] = 600
    sns.set(style="darkgrid")

    if len(sys.argv) > 1:
        algorithm = sys.argv[1]
    else:
        algorithm = "ASD"

    data = utils.read_netflix_data()
    mask = (data != 0)

    print("data shape:", data.shape)
    print("data density:", mask.sum() / data.size)

    rank = 50
    iter_max = 10000
    norm_tol = 1e-4

    if algorithm == "sASD":
        minimize = ASD.scaled_alternating_steepest_descent
    else:  # ASD
        minimize = ASD.alternating_steepest_descent

    results = minimize(data, rank, mask, iter_max, norm_tol, verbose=True)
    completed_data = results.matrix

    rmse = utils.rmse(data, completed_data, mask)

    print("RMSE:", rmse)
Ejemplo n.º 9
0
def xgb_boost_model():
    df_all = pickle.load(open("../output/features/basic_features.pkl", 'r'))
    test_ind = df_all.relevance == -1
    test_data = df_all[test_ind]
    train_data = df_all[~test_ind]
    test_data = test_data.drop(['relevance'], axis=1)
    le = preprocessing.LabelEncoder()
    le.fit(train_data['relevance'])

    ids = test_data['id']

    train, test, hold_out = utils.split_dataset(train_data)

    relevant_columns =['title_similarity', 'product_desc_similarity',  'title_similarity_common', 'product_desc_similarity_common', 'description_length', 'search_length']
    dTrain = xgb.DMatrix(train['X'][relevant_columns], label=train['Y'])
    dTest = xgb.DMatrix(test['X'][relevant_columns], label=test['Y'])
    dHold_out = xgb.DMatrix(hold_out['X'][relevant_columns], label=hold_out['Y'])
    dSubmit = xgb.DMatrix(test_data[relevant_columns])

    param = {'bst:max_depth':5  , 'bst:eta':0.05, 'silent':1, 'objective':'reg:linear', 'eval_metric':'rmse'}

    evallist = [(dTest, 'eval'), (dTrain, 'train')]
    numRound = 200
    bst = xgb.train(param, dTrain, numRound, evallist)

    predHoldout = bst.predict(dHold_out)
    print "Mean square hold out error ", utils.rmse(hold_out['Y'], predHoldout)

    predY = bst.predict(dSubmit)
    utils.debug_model(hold_out['X'], hold_out['Y'], predY)
Ejemplo n.º 10
0
def run_ridge_on_cat(cat):
    if not is_in_cache('cat_ridges_blend_l3_' + cat):
        print_step(cat + ' > Subsetting')
        train_c = train_[train['parent_category_name'] == cat].copy()
        test_c = test_[test['parent_category_name'] == cat].copy()
        print(train_c.shape)
        print(test_c.shape)
        target = train_c['deal_probability'].values
        train_id = train_c['item_id']
        test_id = test_c['item_id']
        train_c.drop(['deal_probability', 'item_id'], axis=1, inplace=True)
        test_c.drop('item_id', axis=1, inplace=True)

        print_step(cat + ' > Modeling')
        results = run_cv_model(train_c, test_c, target, runLasso, params, rmse,
                               cat + '-ridge-blend')
        train_c['cat_ridge'] = results['train']
        test_c['cat_ridge'] = results['test']
        print_step(cat + ' > RMSE: ' + str(rmse(target, train_c['cat_ridge'])))

        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        print_step(cat + ' > Saving in Cache')
        train_c['item_id'] = train_id
        test_c['item_id'] = test_id
        save_in_cache('cat_ridges_blend_l3_' + cat,
                      train_c[['item_id',
                               'cat_ridge']], test_c[['item_id', 'cat_ridge']])
        return True
    else:
        print_step('Already have ' + cat + '...')
        return True
Ejemplo n.º 11
0
def eval(data, model_path):
    """
    학습이 완료된 matrix들을 loading하여 test_data에 대한 rmse 평가
    :param data: list of test data
    
    :return: rmse
    """

    pred_ratings, true_ratings = [], []
    Q, P, Q_b, P_b, b = utils.load_each_matrix(model_path)
    userId2idx, movieId2idx = utils.load_id2idx(model_path)
    complete_matrix = b + P_b[:, np.newaxis] + Q_b[np.newaxis:, ] + P.dot(Q.T)

    with open(os.path.join(model_path, 'result.csv'), 'w',
              encoding='utf8') as f:
        for (user_id, movie_id, rating, timestamp) in data:
            true_ratings.append(rating)
            pred = complete_matrix[int(userId2idx[user_id]),
                                   int(movieId2idx[movie_id])]
            if pred < 0 or pred > 8:
                pred = b
            pred_ratings.append(pred)
            f.write(
                str(user_id) + ',' + str(movie_id) + ',' + str(pred) + ',' +
                str(timestamp))

    return utils.rmse(pred_ratings, true_ratings)
 def test(self, data):
     predicted = []
     real = []
     for movie, user, rating in data:
         if movie-1 < self.n_movies and user-1 < self.n_users:
             predicted.append(self.predictions[movie-1, user-1])
             real.append(rating)
     return rmse(real, predicted)
Ejemplo n.º 13
0
def calculateMetrics(time_range,model,ovitrap_eggs_i):
    BS_a,vBS_d,m,n,OVIPOSITION=model.parameters.BS_a,model.parameters.vBS_d,model.parameters.m,model.parameters.n,model.parameters.OVIPOSITION
    Y=model.Y
    indexOf=lambda t: (np.abs(time_range-t)).argmin()
    lwO=np.array([ (Y[indexOf(t),OVIPOSITION]-Y[indexOf(t-7),OVIPOSITION]).reshape(m,n).sum(axis=0) for t in time_range])/(BS_a*vBS_d)
    lwO=lwO[:,0]#if multiple container w assume the first one is ovitrap and the rest are wild containers
    d=utils.rmse(ovitrap_eggs_i[ovitrap_eggs_i!=[None]], lwO[ovitrap_eggs_i!=[None]])
    return d
Ejemplo n.º 14
0
 def test_step(self, batch, batch_nb):
     x, y, uids = batch
     y_pred = self(x)
     loss = rmse(y_pred, y)
     self.log(f"forward/test/loss", loss, prog_bar=True)
     nngraph.save_integral_emiss_point(
         y_pred, y, "/data-new/alok/laser/forwards_val_points.txt", all_points=True
     )
     return loss
Ejemplo n.º 15
0
def alternating_steepest_descent(z0, rank, mask, max_iter, norm_tol, verbose=False):
    begin = time.time()

    # Initialize
    U, s, V = np.linalg.svd(mask*z0, full_matrices=False)
    s[rank:] = 0
    x = (U @ np.diag(s))[:,:rank]
    y = V[:rank,:]

    xy = x@y
    diff = mask*(z0 - xy)

    residuals = []
    norm_z0 = norm(mask*z0)

    tenPowers = [10**k for k in range(10)]

    for num_iter in range(max_iter):
        grad_x = -diff @ y.T

        delta_xy = mask*(grad_x@y)
        tx = norm(grad_x)**2/norm(delta_xy)**2
        x = x - tx*grad_x

        diff = diff + tx*delta_xy
        grad_y = -x.T @ diff

        delta_xy = mask*(x@grad_y)
        ty = norm(grad_y)**2/norm(delta_xy)**2
        y = y - ty*grad_y

        diff = diff + ty*delta_xy
        residual = norm(diff)/norm_z0

        if verbose:
            print(num_iter, residual)

        if num_iter % 1000 == 0:
            residuals.append(residual)

        if residual < norm_tol:
            break

    xy = x@y
    asd_time = time.time() - begin

    rmse = utils.rmse(z0, xy, mask)

    Result = namedtuple("Result", ["algorithm", "matrix", "time", "residual", "num_iterations", "rmse"])
    result = Result(algorithm="ASD", matrix=x@y, time=asd_time, residual=residual, num_iterations=num_iter+1, rmse=rmse)

    if verbose:
        print("Algoritmo: ASD")
        print("Tiempo:", asd_time)
        print("Iteraciones:", num_iter)

    return result
Ejemplo n.º 16
0
    def _calc_msve(self):
        """Calculates the MSVE between the true state-values and the current
        value-estimates The calculates MSVE is added to the `msve` list.
        """
        v = []
        for state in self._env.state_iterator():
            feature_vector = self._features.vector(state)
            v.append(utils.state_value(feature_vector, self.theta))

        self.msve.append(utils.rmse(v, self._true_values))
Ejemplo n.º 17
0
def loss_function(batch_x, batch_y):

    logits = model(batch_x, training=True)

    denorm_x = denorm(logits, _min, _max)

    denorm_y = denorm(batch_y, _min, _max)

    lossL2 = tf.add_n(model.losses)

    return rmse(denorm_x, denorm_y) + lossL2
Ejemplo n.º 18
0
def runTFFM(train_X, train_y, test_X, test_y, test_X2, params):
    model = TFFMRegressor(**params)
    print_step('Fit TFFM')
    for i in range(rounds):
        model.fit(train_X, train_y.values, n_epochs=iters)
        pred_test_y = model.predict(test_X)
        print_step('Iteration {}/{} -- RMSE: {}'.format(
            i + 1, rounds, rmse(pred_test_y, test_y)))
    print_step('TFFM Predict 2/2')
    pred_test_y2 = model.predict(test_X2)
    return pred_test_y, pred_test_y2
Ejemplo n.º 19
0
    def validation_step(self, batch, batch_nb):
        x, y, uids = batch
        y_pred = self(x)
        loss = rmse(y_pred, y)
        randcheck = np.random.uniform()
        self.log(f"forward/val/loss", loss, prog_bar=True)

        if self.current_epoch > self.config["forward_num_epochs"] - 5:
            nngraph.save_integral_emiss_point(
                y_pred, y, "/data-new/alok/laser/forwards_val_points.txt", all_points=True
            )
        return loss
Ejemplo n.º 20
0
def runFM(train_X, train_y, test_X, test_y, test_X2, params):
    params['D'] = train_X.shape[1]
    rounds = params.pop('rounds')
    model = FM_FTRL(**params)
    print_step('Fit FM')
    for i in range(rounds):
        model.fit(train_X, train_y, reset=False)
        pred_test_y = model.predict(test_X)
        print_step('Iteration {}/{} -- RMSE: {}'.format(i + 1, rounds, rmse(pred_test_y, test_y)))
    print_step('FM Predict 2/2')
    pred_test_y2 = model.predict(test_X2)
    return pred_test_y, pred_test_y2
Ejemplo n.º 21
0
    def training_step(self, batch, batch_nb):
        x, y, uids = batch
        y_pred = self(x)
        loss = rmse(y_pred, y)
        # nngraph.emiss_error_graph(y_pred, y, "train_step.png")
        # self.log_image(key="train_forwards_error_graphs", images=["train_step.png"])
        if self.current_epoch == self.config["forward_num_epochs"] - 5:
            nngraph.save_integral_emiss_point(
                y_pred, y, "/data-new/alok/laser/forwards_train_points.txt", all_points=True
            )

        self.log(f"forward/train/loss", loss, prog_bar=True)
        return loss
Ejemplo n.º 22
0
def evaluate(model, loader):
    model.eval()
    y_hat_list = []
    y_list = []
    for batch_data in loader:
        a2a_g, b2a_g, b2b_gl, feats, types, counts, y = batch_data
        _, y_hat = model(a2a_g, b2a_g, b2b_gl, types, counts)
        y_hat_list += y_hat.tolist()
        y_list += y.tolist()

    y_hat = np.array(y_hat_list).reshape(-1,)
    y = np.array(y_list).reshape(-1,)
    return rmse(y, y_hat), mae(y, y_hat), sd(y, y_hat), pearson(y, y_hat)
Ejemplo n.º 23
0
    def forecasting(self):
        print 'Data len %.0f' %(len(self.series_norm))
        print 'Forecasting...%s %s' %(self.model_type, self.feature_type)
        y = self.series_norm.ix[:, 0]
        x = self.series_norm.ix[:, 1:]
        errsfit = []
        errsfor = []
        start = 0
        end = self.window_size
        while (end <= len(self.series_norm) - self.step_size):
            ytrain = y[start:end]
            ytest = y[end:end + self.step_size]
            fmodel = []

            if self.model_type == 'ar':
                #print ytrain.shape, ytest.shape
                # ytrain = sm.add_constant(ytrain)
                fmodel = TSModel(endog=ytrain, method=self.model_type, steps=self.step_size, isnpa=False, verbose=False)

            elif  self.model_type == 'var':
                xtrain = x[start:end]
                # feature filtering on segmented a constant variation.
                xtrain = xtrain.loc[:, (xtrain != xtrain.ix[0]).any()]
                fmodel = TSModel(endog=ytrain, feature=xtrain, method=self.model_type, steps=self.step_size, isnpa=False,
                                 verbose=False)

            # try:
            fmodel.fit_forecast()
            if fmodel.result is not None:
                efit = rmse(ytrain, fmodel.result['fit'])
                efor = rmse(ytest, fmodel.result['forecast'])
                # print '\t',model,start, end,  efit, efor , ytest,  fmodel.result['forecast'] #fmodel.result['fit'],
                errsfit.append(efit)
                errsfor.append(efor)
            start += self.step_size
            end += self.step_size

        print self.model_type, self.feature_type, self.companies, len(errsfit), len(errsfor), np.mean(errsfit), np.mean(errsfor)
Ejemplo n.º 24
0
def GetScores(actual, pred):
    """
	get an RMS error for each model
	"""
    dim = len(pred.shape)

    if dim == 1:
        # --- dont account for NaNs
        valid_yhat = ~np.isnan(pred)
        score = utils.rmse(actual[valid_yhat], pred[valid_yhat])

    else:

        score = []

        for r in range(pred.shape[0]):
            # --- dont account for NaNs
            valid_yhat = ~np.isnan(pred[r])
            score.append(utils.rmse(actual[valid_yhat], pred[r, valid_yhat]))

        score = np.array(score)

    return score
def main():
    # read data from csv file
    data = pd.read_csv('headbrain.csv')
    print("data.shape = {}".format(data.shape))

    # load data to x and y
    x = data['Head Size(cm^3)'].values
    y = data['Brain Weight(grams)'].values
    print(x.shape)

    beta = estimate_coefficients(x, y)

    # TEST
    predict(3000, beta)
    # END TEST

    # evaluate the model
    rmse(x, y, beta)
    r2_score(x, y, beta)
    plot_regression_line(x,
                         y,
                         beta,
                         xlabel='Head Size in cm3',
                         ylabel='Brain Weight in grams')
Ejemplo n.º 26
0
def test():
    print("------------test------------")
    test_mae = 0
    test_rmse = 0
    net.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            # t = time()
            x, e, y = data
            output = net(x, adj, e)
            # print(time() - t)
            test_mae += utils.mae(output, y)
            test_rmse += utils.rmse(output, y)

        print("mae:{:2f} , rmse:{:2f}".format(test_mae / (i + 1),
                                              test_rmse / (i + 1)))
Ejemplo n.º 27
0
def Error(truth, pred):

    scores = []

    for i in range(truth.shape[1]):
        err = utils.rmse(y_obs=truth[:, i], y_hat=pred[:, i])
        scores.append(err)

    s = 0
    for row in range(truth.shape[0]):
        for col in range(truth.shape[1]):
            s += (truth[row, col] - pred[row, col])**2

    score = np.sqrt(s / (truth.shape[0] * truth.shape[1]))
    score = np.round(score[0], 6)

    return score, scores
Ejemplo n.º 28
0
def distance(ovitrap_eggs_i,lwO):
    if(sys.argv[2]==RMSE):
        return utils.rmse(ovitrap_eggs_i[ovitrap_eggs_i!=[None]], lwO[ovitrap_eggs_i!=[None]])
    elif(sys.argv[2]==D):
        return utils.D(ovitrap_eggs_i[ovitrap_eggs_i!=[None]], lwO[ovitrap_eggs_i!=[None]])
    elif(sys.argv[2] in [FRECHET,DTW]):
        ovitrap_eggs_i=np.array(ovitrap_eggs_i,dtype=np.float)#this change None for np.nan
        valid_ovi_idx=~np.isnan(ovitrap_eggs_i)
        reversed_valid_ovi_idx=valid_ovi_idx[::-1]
        first,last=np.argmax(valid_ovi_idx), len(reversed_valid_ovi_idx)-np.argmax(reversed_valid_ovi_idx)-1
        x=np.array([[time_range[idx],lwO[idx]] for idx in range(first,last)])
        y=np.array([ [time_range[idx],ovitrap_eggs_i[idx] ] for idx,isValid in enumerate(valid_ovi_idx) if isValid])
        if(sys.argv[2]==FRECHET): return sm.frechet_dist(x,y)
        if(sys.argv[2]==DTW): return sm.dtw(x,y)[0]
    else:
        print('Metric %s not found'%sys.argv[2])
        quit()
Ejemplo n.º 29
0
 def predict_step(self, batch, _batch_nb):
     out = {"params": None, "pred_emiss": None, "pred_loss": None}
     # If step data, there's no corresponding laser params
     try:
         (y, ) = batch  # y is emiss
     except ValueError:
         (y, x, uids) = batch  # y is emiss,x is laser_params
         out["true_params"] = x
         out["uids"] = uids
     out["true_emiss"] = y
     x_pred = self(y)
     out["params"] = x_pred
     if self.forward_model is not None:
         y_pred = self.forward_model(x_pred)
         out["pred_emiss"] = y_pred
         y_loss = rmse(y_pred, y)
         out["pred_loss"] = y_loss
         loss = y_loss
     return out
    def train(self, data):
        """
		Train the internal models to predict the behavior of each sensor
		It is important to ensure that the training data is taken from 
		models that are correct.
		"""
        # Read in the data
        cross_validate = False

        # Choose model parameter search space
        print "Training {0} sensors with {1} rows".format(
            data.shape[1], data.shape[0])

        for sensor in range(data.shape[1]):
            print "Training model for sensor {0}".format(sensor)
            X, Y = utils.split_xy(data, sensor)

            if self.cross_validate:
                svr = SVR()
                clf = GridSearchCV(svr, self.cvparams, verbose=3)
                model = clf.fit(X, Y)
                print "Best model params for sensor {0}:".format(sensor)
                print model.best_params_
            else:
                C = self.defaults["C"]
                kernel = self.defaults["kernel"]
                gamma = self.defaults["gamma"]
                clf = SVR(C=C, kernel=kernel, gamma=gamma)
                model = clf.fit(X, Y)

            # Check the training RMSE to ensure we are on track
            print "Testing <sensor={0}> model with {1} training rows".format(
                sensor, data.shape[0])
            Yhat = model.predict(X)
            rmse = utils.rmse(Yhat, Y)
            self.models[sensor] = model
            print "RMSE for <sensor={0}> on training data is {1}".format(
                sensor, rmse)
Ejemplo n.º 31
0
def _exp(num_episodes, estimators, data_name, verbose=0):
    # Loop for a number of experiments on the single dataset
    rmse_buffer, reward_est_buffer = list(), list()
    for episode in range(num_episodes):
        print("=== {} ===".format(data_name))
        reward_est, reward_true = single_run(estimators=estimators,
                                             data_name=data_name)
        _rmse = {
            key: rmse(a=np.mean(value), b=reward_true)
            for key, value in reward_est.items()
        }
        rmse_buffer.append(_rmse)
        reward_est_buffer.append(reward_est)
    """ Compute overall Bias and RMSE """
    # aggregate all the results over all the epochs
    _bias_ = aggregator(buffer=reward_est_buffer)
    _rmse_ = aggregator(buffer=rmse_buffer)

    # run one more experiment to compute the bias
    reward_est, _ = single_run(estimators=estimators, data_name=data_name)
    dict_bias = {
        key: np.mean((value / num_episodes) - _value)
        for (key, value), (_,
                           _value) in zip(_bias_.items(), reward_est.items())
    }
    dict_rmse = {key: value / num_episodes for key, value in _rmse_.items()}

    if verbose:
        for (key, value_bias), (_, value_rmse) in zip(dict_bias.items(),
                                                      dict_rmse.items()):
            print("[{}: {}] RMSE over {}-run: {}".format(
                data_name, key, num_episodes, value_rmse))
            print("[{}: {}] Bias over {}-run: {}".format(
                data_name, key, num_episodes, value_bias))

    return dict_bias, dict_rmse
Ejemplo n.º 32
0
     for row in test_df.itertuples():
         user, item, actual = row[1]-1, row[2]-1, row[3]
         predictions_baseline.append(pre.predict_baseline(user, item))
         predictions_itemCF.append(pre.predict_itemCF(user, item))
         predictions_userCF.append(pre.predict_userCF(user, item))
         predictions_itemCF_baseline.append(pre.predict_itemCF_baseline(user, item))
         predictions_userCF_baseline.append(pre.predict_userCF_baseline(user, item))
         predictions_itemCF_bias.append(pre.predict_itemCF_bias(user, item))
         predictions_topkCF_item.append(pre.predict_topkCF_item(user, item, 20))
         predictions_topkCF_user.append(pre.predict_topkCF_user(user, item, 30))
         predictions_normCF_item.append(pre.predict_normCF_item(user, item, 20))
         predictions_normCF_user.append(pre.predict_normCF_user(user, item, 30))
         predictions_blend.append(pre.predict_blend(user, item, 20, 30, 0.7))
         targets.append(actual)
 
     rmse_baseline.append(utils.rmse(np.array(predictions_baseline), np.array(targets)))
     rmse_itemCF.append(utils.rmse(np.array(predictions_itemCF), np.array(targets)))
     rmse_userCF.append(utils.rmse(np.array(predictions_userCF), np.array(targets)))
     rmse_itemCF_baseline.append(utils.rmse(np.array(predictions_itemCF_baseline), np.array(targets)))
     rmse_userCF_baseline.append(utils.rmse(np.array(predictions_userCF_baseline), np.array(targets)))
     rmse_itemCF_bias.append(utils.rmse(np.array(predictions_itemCF_bias), np.array(targets)))
     rmse_topkCF_item.append(utils.rmse(np.array(predictions_topkCF_item), np.array(targets)))
     rmse_topkCF_user.append(utils.rmse(np.array(predictions_topkCF_user), np.array(targets)))
     rmse_normCF_item.append(utils.rmse(np.array(predictions_normCF_item), np.array(targets)))
     rmse_normCF_user.append(utils.rmse(np.array(predictions_normCF_user), np.array(targets)))
     rmse_blend.append(utils.rmse(np.array(predictions_blend), np.array(targets)))
     print('测试完成')
 print('------ 测试结果 ------')
 print('各方法在交叉验证下的RMSE值:')
 print('baseline:           %.4f' % np.mean(rmse_baseline))
 print('itemCF:             %.4f' % np.mean(rmse_itemCF))
def kfold_lightgbm(train_df, test_df, num_folds):
    print('Starting LightGBM. Train shape: {}'.format(train_df.shape))

    # Cross validation
    folds = GroupKFold(n_splits=num_folds)

    # Create arrays and dataframes to store results
    oof_preds = np.zeros(train_df.shape[0])
    sub_preds = np.zeros(test_df.shape[0])
    feature_importance_df = pd.DataFrame()
    feats = [f for f in train_df.columns if f not in FEATS_EXCLUDED]
    group = train_df['month'].astype(str) + '_' + train_df['year'].astype(str)

    # k-fold
    for n_fold, (train_idx, valid_idx) in enumerate(
            folds.split(train_df[feats], groups=group)):
        # split train/valid
        train_x, train_y = train_df[feats].iloc[train_idx], train_df[
            'demand'].iloc[train_idx]
        valid_x, valid_y = train_df[feats].iloc[valid_idx], train_df[
            'demand'].iloc[valid_idx]

        # set data structure
        lgb_train = lgb.Dataset(train_x, label=train_y, free_raw_data=False)

        lgb_test = lgb.Dataset(valid_x, label=valid_y, free_raw_data=False)

        params = {
            #                'device' : 'gpu',
            #                'gpu_use_dp':True,
            'boosting': 'gbdt',
            'metric': ['rmse'],
            'objective': 'tweedie',
            'learning_rate': 0.05,
            'tweedie_variance_power': 1.1,
            'subsample': 0.5,
            'subsample_freq': 1,
            'num_leaves': 2**8 - 1,
            'min_data_in_leaf': 2**8 - 1,
            'feature_fraction': 0.8,
            'verbose': -1,
            'seed': int(2**n_fold),
            'bagging_seed': int(2**n_fold),
            'drop_seed': int(2**n_fold),
            'num_threads': -1
        }

        # train model
        reg = lgb.train(params,
                        lgb_train,
                        valid_sets=[lgb_train, lgb_test],
                        valid_names=['train', 'test'],
                        num_boost_round=10000,
                        early_stopping_rounds=200,
                        verbose_eval=100)

        # save model
        reg.save_model(f'../output/lgbm_group_k_fold_21days_{n_fold}.txt')

        # save predictions
        oof_preds[valid_idx] = reg.predict(valid_x,
                                           num_iteration=reg.best_iteration)
        sub_preds += reg.predict(
            test_df[feats], num_iteration=reg.best_iteration) / folds.n_splits

        # save feature importances
        fold_importance_df = pd.DataFrame()
        fold_importance_df['feature'] = feats
        fold_importance_df['importance'] = np.log1p(
            reg.feature_importance(importance_type='gain',
                                   iteration=reg.best_iteration))
        fold_importance_df['fold'] = n_fold + 1
        feature_importance_df = pd.concat(
            [feature_importance_df, fold_importance_df], axis=0)

        print('Fold %2d RMSE : %.6f' %
              (n_fold + 1, rmse(valid_y, oof_preds[valid_idx])))
        del reg, train_x, train_y, valid_x, valid_y
        gc.collect()

    # display importances
    display_importances(
        feature_importance_df,
        '../imp/lgbm_importances_group_k_fold_21days.png',
        '../imp/feature_importance_lgbm_group_k_fold_21days.csv')

    # Full RMSE score and LINE Notify
    full_rmse = rmse(train_df['demand'], oof_preds)
    line_notify('Full RMSE score %.6f' % full_rmse)

    # save out of fold prediction
    train_df.loc[:, 'demand'] = oof_preds
    train_df = train_df.reset_index()
    train_df[['id', 'd', 'demand']].to_csv(oof_file_name, index=False)

    # reshape prediction for submit
    test_df.loc[:, 'demand'] = sub_preds
    test_df = test_df.reset_index()
    preds = test_df[['id', 'd', 'demand']].reset_index()

    # save csv
    preds.to_csv(submission_file_name, index=False)

    # LINE notify
    line_notify('{} done.'.format(sys.argv[0]))
Ejemplo n.º 34
0
# desvio padrao). Selecione aleatoriamente 75% dos dados para treinamento.
# Retorne a estrutura da arvore construida.
nclasses = np.union1d(y, y).size
n = len(y)
randind = np.arange(0, n)
np.random.shuffle(randind)
ind_train = randind[0:0.75 * n]
ind_test = randind[0.75 * n:n]

tree = RegressionTree(nclasses)
tree.train(x[ind_train, :], y[ind_train], SDRMIN=0.1, NMIN=3)

g, pos = tree.gerar_grafo()
utils.draw_graph(g, pos)

# b) Use os restantes 25% dos dados para avaliacao. Retorne as medidas MAPE e
# RMSE.

yhat = tree.estimate(x[ind_test, :])

rmse = utils.rmse(y[ind_test], yhat)
mape = utils.mape(y[ind_test], yhat)
print 'RMSE encontrado: {:3.2f}\nMAPE encontrado: {:3.2f}'.format(rmse,mape)

plt.plot(y[ind_test])
plt.hold(True)
plt.plot(yhat)
plt.legend(['real','estimado'])
plt.show()

# c) Tente obter as regras de decisao a partir da arvore construida.
Ejemplo n.º 35
0
plt.title('Polinomio original')
plt.ylabel('y')
plt.xlabel('x')
plt.hold(True)
plt.plot(x[n / 2:, :], yhat)
plt.savefig('./bases/results/polinomio_estimado')
plt.clf()

print 'Letra A'
print 'Polinomio encontrado: '
print 'y = {:3.3f} + {:3.3f}x {: 3.3f}x^2\n'.format(what[0][0], what[1][0], what[2][0])

# b) Obtenha o RMSE e MAPE do modelo obtido sobre os dados da segunda metade dos
# dados;
print 'Letra B'
rmse = utils.rmse(y[n / 2:, :], yhat)
print 'RMSE = ' + str(rmse) + '\n'
mape = utils.mape(y[n / 2:, :], yhat)
print 'MAPE = ' + str(mape) + '\n'

# c) Estimar o modelo que melhor se ajusta aos dados usando todos os dados.
# Informe os parametros do modelo encontrado. Use os fatores de determinacao de
# complexidade do modelo para auxiliar a encontrar o modelo. Obtenha o RMSE e MAPE
# do modelo obtido sobre os dados.
print 'Letra C'
MAXDEGREE = 5
plt.Figure
plt.hold(True)
plt.grid(True)
plt.plot(x, y)
plt.title('Ajuste Polinomial')
Ejemplo n.º 36
0
    y_pred_gb = np.zeros(y.shape)
    for trn_idx, val_idx in KFold(X.shape[0], n_folds=5):
        # split training data
        X_trn, X_tst, y_trn, y_tst = X[trn_idx,:], X[val_idx,:], y[trn_idx], y[val_idx] 
    
        # Initialize the famous Random Forest Regressor from scikit-learn
        clf = RandomForestRegressor(n_estimators=50, n_jobs=4, random_state=23)
        clf.fit(X_trn, y_trn)
        y_pred_rf[val_idx] = clf.predict(X_tst)
    
        # or the Gradient Boosting Regressor
        clf = GradientBoostingRegressor(n_estimators=200, max_depth=3, random_state=23)
        clf.fit(X_trn, y_trn)
        y_pred_gb[val_idx] = clf.predict(X_tst)
        
        print('  Score RFR/GBR: %.4f, %.4f' % (rmse(y_tst, y_pred_rf[val_idx]), 
                                               rmse(y_tst, y_pred_gb[val_idx])))


    # save prediction result to file
    err_rf = rmse(y, y_pred_rf)
    err_gb = rmse(y, y_pred_gb)
    id_ = filename.replace('train_pp_','').replace('.csv','')
    res[id_] = {'size':X.shape[0], 'd_th':th1, 'rf':err_rf, 'gb':err_gb}

    print('Total Score: %.4f, %.4f' % (err_rf, err_gb))


with open('training_result_TVT.pkl', 'wb') as fp:
    pickle.dump(res, fp, -1)