def predict(self, train_x, train_y, test_x, parameter):
     # self.fit(parameter,train_x,train_y)
     # return self.clf.predict(train_x),self.clf.predict(test_x)
     self.fit(parameter, train_x, np.log1p(train_y))
     train_predict = np.expm1(self.clf.predict(train_x))
     test_predict = np.expm1(self.clf.predict(test_x))
     return train_predict, test_predict
Beispiel #2
0
def _gpinv(p, k, sigma):
    """Inverse Generalized Pareto distribution function"""
    x = np.full_like(p, np.nan)
    if sigma <= 0:
        return x
    ok = (p > 0) & (p < 1)
    if np.all(ok):
        if np.abs(k) < np.finfo(float).eps:
            x = - np.log1p(-p)
        else:
            x = np.expm1(-k * np.log1p(-p)) / k
        x *= sigma
    else:
        if np.abs(k) < np.finfo(float).eps:
            x[ok] = - np.log1p(-p[ok])
        else:
            x[ok] = np.expm1(-k * np.log1p(-p[ok])) / k
        x *= sigma
        x[p == 0] = 0
        if k >= 0:
            x[p == 1] = np.inf
        else:
            x[p == 1] = - sigma / k

    return x
Beispiel #3
0
def hyperbolic_ratio(a, b, sa, sb):
    '''
    Return ratio of hyperbolic functions
          to allow extreme variations of arguments.
    
    Parameters
    ----------       
    a, b : array-like
        arguments vectors of the same size
    sa, sb : scalar integers
        defining the hyperbolic function used, i.e., f(x,1)=cosh(x), f(x,-1)=sinh(x)
        
    Returns
    -------
    r : ndarray
        f(a,sa)/f(b,sb), ratio of hyperbolic functions of same
                    size as a and b
     Examples
     --------
     >>> x = [-2,0,2]
     >>> hyperbolic_ratio(x,1,1,1)   # gives r=cosh(x)/cosh(1)
     array([ 2.438107  ,  0.64805427,  2.438107  ])
     >>> hyperbolic_ratio(x,1,1,-1)  # gives r=cosh(x)/sinh(1)
     array([ 3.20132052,  0.85091813,  3.20132052])
     >>> hyperbolic_ratio(x,1,-1,1)  # gives r=sinh(x)/cosh(1)
     array([-2.35040239,  0.        ,  2.35040239])
     >>> hyperbolic_ratio(x,1,-1,-1) # gives r=sinh(x)/sinh(1)
     array([-3.08616127,  0.        ,  3.08616127])
     >>> hyperbolic_ratio(1,x,1,1)   # gives r=cosh(1)/cosh(x)
     array([ 0.41015427,  1.54308063,  0.41015427])
     >>> hyperbolic_ratio(1,x,1,-1)  # gives r=cosh(1)/sinh(x)
     array([-0.42545906,         inf,  0.42545906])
     >>> hyperbolic_ratio(1,x,-1,1)  # gives r=sinh(1)/cosh(x)
     array([ 0.3123711 ,  1.17520119,  0.3123711 ])
     >>> hyperbolic_ratio(1,x,-1,-1) # gives r=sinh(1)/sinh(x)
     array([-0.32402714,         inf,  0.32402714])
     
     See also  
     --------
     tran
    '''

    ak, bk, sak, sbk = np.atleast_1d(a, b, np.sign(sa), np.sign(sb))
    # old call
    #return exp(ak-bk)*(1+sak*exp(-2*ak))/(1+sbk*exp(-2*bk))
    # TODO: Does not always handle division by zero correctly

    signRatio = np.where(sak * ak < 0, sak, 1)
    signRatio = np.where(sbk * bk < 0, sbk * signRatio, signRatio)    
    
    bk = np.abs(bk)
    ak = np.abs(ak)
     
    num = np.where(sak < 0, expm1(-2 * ak), 1 + exp(-2 * ak))
    den = np.where(sbk < 0, expm1(-2 * bk), 1 + exp(-2 * bk))
    iden = np.ones(den.shape) * inf
    ind = np.flatnonzero(den != 0)
    iden.flat[ind] = 1.0 / den[ind]
    val = np.where(num == den, 1, num * iden)
    return signRatio * exp(ak - bk) * val #((sak+exp(-2*ak))/(sbk+exp(-2*bk)))
def numpy_sweep(start_frequency=20.0,
                stop_frequency=20000.0,
                phase=0.0,
                interval=(0, 1.0),
                sampling_rate=48000.0,
                length=2 ** 16):
    """A pure NumPy implementation of the ExponentialSweep for benchmarking.
    See the ExponentialSweep class for documentation of the parameters.
    """
    # allocate shared memory for the channels
    array = sharedctypes.RawArray(ctypes.c_double, length)
    channels = numpy.frombuffer(array, dtype=numpy.float64).reshape((1, length))
    # generate the sweep
    start, stop = sumpf_internal.index(interval, length)
    sweep_offset = float(start / sampling_rate)
    sweep_duration = (stop - start) / sampling_rate
    frequency_ratio = stop_frequency / start_frequency
    l = sweep_duration / math.log(frequency_ratio)
    a = 2.0 * math.pi * start_frequency * l
    t = numpy.linspace(-sweep_offset, (length - 1) / sampling_rate - sweep_offset, length)
    array = t
    array /= l
    numpy.expm1(array, out=array)
    array *= a
    array += phase
    numpy.sin(array, out=channels[0, :])
    # fake store some additional values, because these values are actually stored in the constructor of the sweep
    _ = start_frequency * frequency_ratio ** (-sweep_offset / sweep_duration)                       # noqa: F841
    _ = start_frequency * frequency_ratio ** ((sweep_duration - sweep_offset) / sweep_duration)     # noqa: F841
    return sumpf.Signal(channels=channels, sampling_rate=sampling_rate, offset=0, labels=("Sweep",))
def myRMSPE_xg(yhat,y):
    
    y = np.expm1(y.get_label())
    yhat = np.expm1(yhat)
    r=myRMSPE(yhat,y)
    
    return "rmspe", r
Beispiel #6
0
 def updateParams(self):
     self.pop.sort(key=op.attrgetter('f'))
     self.pSigma = np.dot(1.0 - self.cSigma, self.pSigma) + np.dot(
         np.sqrt(self.cSigma * (2.0 - self.cSigma) * self.muEff),
         sum(np.dot(self.rankWeight[i], self.pop[i].z) for i in range(self.popsize)))
     rate = np.linalg.norm(self.pSigma) / self.expectationChiDistribution
     if rate >= 1.0 :
         wsum = 0
         for i in range(self.popsize):
             self.weight[i] = self.hatWeight[i] * np.expm1(self.alpha * np.linalg.norm(self.pop[i].z) + 1.0)
             wsum += self.weight[i]
         for i in range(self.popsize):
             self.weight[i] = self.weight[i] / wsum - 1.0 / self.popsize
     else:
         self.weight = self.rankWeight
     if rate >= 1.0:
         self.etaB = self.etaBMove
         self.etaSigma = self.etaSigmaMove
     elif rate >= 0.1:
         self.etaB = self.etaBStag
         self.etaSigma = self.etaSigmaStag
     else:
         self.etaB = self.etaBConv
         self.etaSigma = self.etaSigmaConv
     GDelta = sum(np.dot(self.weight[i], self.pop[i].z) for i in range(self.popsize))
     GMu = sum(self.weight[i] * (np.outer(self.pop[i].z, self.pop[i].z) - np.eye(self.dim)) for i in range(self.popsize))
     GSigma = np.trace(GMu) / self.dim
     GB = GMu - GSigma * np.eye(self.dim)
     self.mu += self.etaMu * self.sigma * np.dot(self.B, GDelta)
     self.sigma *= (np.expm1(0.5 * self.etaSigma * GSigma) + 1.0)
     self.B = np.dot(self.B, linalg.expm3(0.5 * self.etaB * GB))
Beispiel #7
0
def Ridge_model(train_linear, test_linear):
    ridgecv = RidgeCV(alphas = np.logspace(-5, 4, 400))
    ridgecv.fit(train_linear_fea, train_linear_tar)
    ridgecv_score = ridgecv.score(train_linear_fea, train_linear_tar)
    ridgecv_alpha = ridgecv.alpha_
    print("Best alpha : ", ridgecv_alpha, "Score: ",ridgecv_score)
    coef=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    
    start=time.time()
    ridge =Ridge(normalize = True)
    ridge.set_params(alpha=ridgecv_alpha,max_iter = 10000)
    #ridge.set_params(alpha=6,max_iter = 10000)
    ridge.fit(x_train, y_train)
    end=time.time()
    mean_squared_error(y_test, ridge.predict(x_test))
    coef_ridge=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    evaluate(ridge,x_test,y_test,x_train,y_train)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_ridge_predict=ridge.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_ridge_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    
    test_prediction_ridge=np.expm1(ridge.predict(test_linear))
    write_pkl(ridgecv_alpha, '/Users/vickywinter/Documents/NYC/Machine Learning Proj/Pickle/ridge_params.pkl')
    return test_prediction_ridge
    
    
def _frank(M, N, alpha):
    if(N<2):
        raise ValueError('Dimensionality Argument [N] must be an integer >= 2')
    elif(N==2):        
        u1 = uniform.rvs(size=M)
        p = uniform.rvs(size=M)
        if abs(alpha) > math.log(sys.float_info.max):
            u2 = (u1 < 0).astype(int) + np.sign(alpha)*u1  # u1 or 1-u1
        elif abs(alpha) > math.sqrt(np.spacing(1)):
            u2 = -1*np.log((np.exp(-alpha*u1)*(1-p)/p + np.exp(-alpha))/(1 + np.exp(-alpha*u1)*(1-p)/p))/alpha
        else:
            u2 = p
        
        U = np.column_stack((u1,u2))
    else:
        # Algorithm 1 described in both the SAS Copula Procedure, as well as the
        # paper: "High Dimensional Archimedean Copula Generation Algorithm"
        if(alpha<=0):
            raise ValueError('For N>=3, alpha >0 in Frank Copula')
            
        U = np.empty((M,N))
        for ii in range(0,M):
            p = -1.0*np.expm1(-1*alpha)
            if(p==1):
                # boundary case protection
                p = 1 - np.spacing(1)
            v = logser.rvs(p, size=1)
            
            # sample N independent uniform random variables
            x_i = uniform.rvs(size=N)
            t = -1*np.log(x_i)/v
            U[ii,:] = -1.0*np.log1p( np.exp(-t)*np.expm1(-1.0*alpha))/alpha
            
    return U
Beispiel #9
0
    def __init__(self, daily_returns, benchmark_daily_returns, risk_free_rate, days, period=DAILY):
        assert(len(daily_returns) == len(benchmark_daily_returns))

        self._portfolio = daily_returns
        self._benchmark = benchmark_daily_returns
        self._risk_free_rate = risk_free_rate
        self._annual_factor = _annual_factor(period)
        self._daily_risk_free_rate = self._risk_free_rate / self._annual_factor

        self._alpha = None
        self._beta = None
        self._sharpe = None
        self._return = np.expm1(np.log1p(self._portfolio).sum())
        self._annual_return = (1 + self._return) ** (365 / days) - 1
        self._benchmark_return = np.expm1(np.log1p(self._benchmark).sum())
        self._benchmark_annual_return = (1 + self._benchmark_return) ** (365 / days) - 1
        self._max_drawdown = None
        self._volatility = None
        self._annual_volatility = None
        self._benchmark_volatility = None
        self._benchmark_annual_volatility = None
        self._information_ratio = None
        self._sortino = None
        self._tracking_error = None
        self._annual_tracking_error = None
        self._downside_risk = None
        self._annual_downside_risk = None
        self._calmar = None
        self._avg_excess_return = None
def rmspe_xg(y_hat, y):
    y = np.expm1(y.get_label())
    w = ToWeight(y)
    y_hat = np.expm1(y_hat)
    score = np.sqrt(np.mean(((y - y_hat) * w) ** 2))

    return "rmspe", score
Beispiel #11
0
    def test_write_subregion_to_file(
        self, machine_timestep, dt, size_in, tau_ref, tau_rc, size_out, probe_spikes, vertex_slice, vertex_neurons
    ):
        # Check that the region is correctly written to file
        region = lif.SystemRegion(size_in, size_out, machine_timestep, tau_ref, tau_rc, dt, probe_spikes)

        # Create the file
        fp = tempfile.TemporaryFile()

        # Write to it
        region.write_subregion_to_file(fp, vertex_slice)

        # Read back and check that the values are sane
        fp.seek(0)
        values = fp.read()
        assert len(values) == region.sizeof()

        (n_in, n_out, n_n, m_t, t_ref, dt_over_t_rc, rec_spikes, i_dims) = struct.unpack_from("<8I", values)
        assert n_in == size_in
        assert n_out == size_out
        assert n_n == vertex_neurons
        assert m_t == machine_timestep
        assert t_ref == int(tau_ref // dt)
        assert (
            tp.value_to_fix(-np.expm1(-dt / tau_rc)) * 0.9
            < dt_over_t_rc
            < tp.value_to_fix(-np.expm1(-dt / tau_rc)) * 1.1
        )
        assert (probe_spikes and rec_spikes != 0) or (not probe_spikes and rec_spikes == 0)
        assert i_dims == 1
    def predict(self, train_x, train_y, test_x, parameter, times=5, validation_indexs=None, type='regression'):
        print parameter['model'] + " predict staring"

        train_preds = np.zeros((times, len(train_x)))
        test_preds = np.zeros((times, len(test_x)))
        for time in xrange(times):
            validation_indexs = genIndexKFold(train_x, 10)
            test_pred = np.zeros((len(validation_indexs), len(test_x)))
            train_pred = np.zeros((len(train_x)))

            for i, (train_ind, test_ind) in enumerate(validation_indexs):
                clf = model_select(parameter)
                print "Fold", i
                X_train = train_x[train_ind]
                Y_train = np.log1p(train_y[train_ind])
                X_test = train_x[test_ind]
                Y_test = train_y[test_ind]

                clf.fit(X_train, Y_train)
                test_pred[i][:] = np.expm1(clf.predict(test_x))
                train_pred[test_ind] = np.expm1(clf.predict(X_test))
                print evaluation_functions.evaluate_function(Y_test, train_pred[test_ind], 'rmsle')
            train_preds[time] = train_pred
            test_preds[time] = np.mean(test_pred, axis=0)

        return np.mean(train_preds, axis=0), np.mean(test_preds, axis=0)
Beispiel #13
0
def expm1(a, b):
    print((numba.typeof(a)))
    print((numba.typeof(np.expm1(a))))
#    result = a**2 + b**2
#    print "... :)"
#    print np.expm1(result), "..."
    return np.expm1(a**2) + b
 def inverse_transform(self, X):
   if self.columns:
     for column in self.columns:
       X[column] = np.expm1(X[column])
       return X
   else:
     return np.expm1(X)
Beispiel #15
0
def output(modelObj):
    test = modelObj.test
    file = modelObj.outputFile
    model = modelObj.model

    # remove id column
    
    test['label'] = test['label'].astype(int)
    
    

    week10 = test[test['Semana']==10]
    



    week11 = test[test['Semana']==11]
    
    week10['pred'] = np.expm1(model.predict(week10.values[:,:-1]))
    file.write('id,Demanda_uni_equil\n')
    temp = week10[['label', 'pred']]
    temp.to_csv(file, index=False, delimiter=',', header=False)
    '''
    week10['Semana'] = week10['Semana'] + 1

    
    week10 = week10[['Cliente_ID', 'Producto_ID', 'Semana', 'pred']]

    week10 = week10.groupby(by=['Cliente_ID', 'Producto_ID', 'Semana'], as_index=False).mean()

    

    week11 = pd.merge(week11, week10, on=['Cliente_ID', 'Producto_ID', 'Semana'], how='left')
    week11['l1'] = week11['pred']
    del week11['pred']
    
    


    temp = week11[['l1','l2','l3','l4','l5']]
    temp = temp.fillna(0)

    week11['lagVar'] = np.var(temp, axis=1)
    week11['newProduct'] = np.sum(temp, axis=1) == 0

    week11['newProduct'].replace(False, 0, inplace=True)
    week11['newProduct'].replace(True, 1, inplace=True)
    '''

    #week11['lagSum'] = week11['l1'] + week11['l2'] + week11['l3'] + week11['l4'] + week11['l5']
    #week11['lagAvg'] = week11['lagSum'] / 5

    week11['pred'] = np.expm1(model.predict(week11.values[:,:-1]))

    temp = week11[['label', 'pred']]
    temp.to_csv(file, index=False, delimiter=',', header=False)

    file.flush()

    return test.shape[0]
    def predict(self, train_x, train_y, test_x, parameter, times=1, validation_indexs=None, type='regression'):
        print parameter['model'] + " predict staring"

        train_preds = np.zeros((times, len(train_x)))
        test_preds = np.zeros((times, len(test_x)))
        for time in xrange(times):
            logging.info("time {}".format(str(time)))
            validation_indexs = genIndexKFold(train_x, 5)
            test_pred = np.zeros((len(validation_indexs), len(test_x)))
            train_pred = np.zeros((len(train_x)))

            for i, (train_ind, test_ind) in enumerate(validation_indexs):
                clf = model_select(parameter)
                logging.info("start time:{} Fold:{}".format(str(time), str(i)))
                print "start time:{} Fold:{}".format(str(time), str(i))
                X_train = train_x[train_ind]
                Y_train = np.log1p(train_y[train_ind])
                X_test = train_x[test_ind]
                Y_test = train_y[test_ind]

                clf.fit(X_train, Y_train)
                test_pred[i][:] = np.expm1(clf.predict(test_x))
                train_pred[test_ind] = np.expm1(clf.predict(X_test))
                evaluation = evaluate_function(
                    Y_test, train_pred[test_ind], 'rmsle')
                logging.info("time:{} Fold:{} evaluation:{}".format(
                    str(time), str(i), str(evaluation)))
            train_preds[time] = train_pred
            test_preds[time] = np.mean(test_pred, axis=0)
            print train_preds, test_preds

        return np.mean(train_preds, axis=0), np.mean(test_preds, axis=0)
	def predict(self,trains_x,train_y,tests_x,parameters,times=10,isFile=True,foldername="blend-dir"):
		"""
		Ensamble many features and regression

		:params train_X: dictionary for training
		:params train_y: testing vector
		"""
		#parameter_get
		test_data_sample = tests_x.values()[0]

		if not os.path.exists(foldername):
			os.makedirs(foldername)

		skf = None
		kfold_file = foldername + "/kfold_index.pkl"
		if os.path.exists(kfold_file):
			skf = pickle.load(open(kfold_file,"r"))
		else:
			skf = KFold(n=len(train_y),n_folds=times,shuffle=True)
			pickle.dump(skf,open(kfold_file,"w"))

		blend_train = np.zeros((len(train_y),len(parameters)))
		blend_test = np.zeros((len(test_data_sample),len(parameters)))

		for j,parameter in enumerate(parameters):
			train_x = trains_x[parameter['data']]
			test_x = tests_x[parameter['data']]

			blend_test_tmp = np.zeros((len(test_data_sample),len(parameters)))

			#file path check
			for i, (train_index,valid_index) in enumerate(skf):
				clf = model_select(parameter['parameter'])

				train = train_x[train_index]
				train_valid_y = train_y[train_index]

				kfold_filepath = "./" + foldername + "/parameter_{}_kfold_{}.pkl".format(j,i)

				if os.path.exists(kfold_filepath):
					blend_train_prediction,blend_test_prediction = pickle.load(open(kfold_filepath,"r"))
					blend_train[train_index,j] = np.expm1(clf.predict(train))
					blend_test_tmp[:,i] = np.expm1(clf.predict(test_x))
				else:
					clf.fit(train,np.log1p(train_valid_y))
					blend_train_prediction = np.expm1(clf.predict(train))
					blend_test_prediction = np.expm1(clf.predict(test_x))
					pickle.dump((blend_train_prediction,blend_test_prediction),open(kfold_filepath,"w"))

				blend_train[train_index,j] = blend_train_prediction
				blend_test_tmp[:,i] = blend_test_prediction
			blend_test[:,j] = blend_test_tmp.mean(1)

		#Blending Model
		bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
		bclf.fit(blend_train, train_y)
		y_test_predict = bclf.predict(blend_test)

		return y_test_predict
Beispiel #18
0
 def testBijectiveAndFinite(self):
   bijector = tfb.Weibull(scale=20., concentration=2., validate_args=True)
   x = np.linspace(1., 8., num=10).astype(np.float32)
   y = np.linspace(
       -np.expm1(-1 / 400.),
       -np.expm1(-16), num=10).astype(np.float32)
   bijector_test_util.assert_bijective_and_finite(
       bijector, x, y, eval_func=self.evaluate, event_ndims=0, rtol=1e-3)
def merge_predict(model1, model2, test_data):
#    Combine the predictions of two separately trained models.
#    The input models are in the log domain and returns the predictions
#    in original domain (expm1).
    p1 = np.expm1(model1.predict(test_data))
    p2 = np.expm1(model2.predict(test_data))
    p_total = (p1+p2)
    return(p_total)
 def test_lasagne_regression(self):
     x, y = self.make_data_set()
     print len(x), y
     neural_network = mlc.model.LasagneNeuralNetwork.NeuralNetwork(
         problem_type="regression", batch_size=100, epochs=1000, layer_number=[100, 100, 100], dropout_layer=[0.0, 0.0, 0.0])
     neural_network.fit(x, np.log1p(y), valid=True,
                        evaluate_function="mean_squared_loss")
     print np.expm1(neural_network.predict(x))
Beispiel #21
0
def process_xgb():
    col, train, test, test_ref = load_data()
    print(train.shape, test.shape, test_ref.shape)

    params = {
        'colsample_bytree': 0.055,
        'colsample_bylevel': 0.4,
        'gamma': 1.5,
        'learning_rate': 0.01,
        'max_depth': 5,
        'objective': 'reg:linear',
        'booster': 'gbtree',
        'min_child_weight': 10,
        'n_estimators': 1800,
        'reg_alpha': 0,
        'reg_lambda': 0,
        'eval_metric': 'rmse',
        'subsample': 0.7,
        'silent': True,
        'seed': 7,
    }
    folds = 20
    full_score = 0.0
    xg_test = xgb.DMatrix(test[col])
    use_regressor = True
    use_regressor = False
    for fold in range(folds):
        x1, x2, y1, y2 = model_selection.train_test_split(train[col], np.log1p(train.target.values), test_size=0.0010, random_state=fold)

        if use_regressor:
            p = params
            model = xgb.XGBRegressor(colsample_bytree=p['colsample_bytree'], colsample_bylevel=p['colsample_bylevel'], gamma=p['gamma'], learning_rate=p['learning_rate'], max_depth=p['max_depth'], objective=p['objective'], booster=p['booster'], min_child_weight=p['min_child_weight'], n_estimators=p['n_estimators'], reg_alpha=p['reg_alpha'], reg_lambda=p['reg_lambda'], eval_metric=p['eval_metric'] , subsample=p['subsample'], silent=1, n_jobs = -1, early_stopping_rounds = 100, random_state=7, nthread=-1)
            model.fit(x1, y1)
            score = np.sqrt(mean_squared_error(y2, model.predict(x2)))
            test['target'] += np.expm1(model.predict(test[col]))
        else:
            xg_valid = xgb.DMatrix(x2, label=y2)
            xg_train = xgb.DMatrix(x1, label=y1)
            model = xgb.train(params, xg_train, params['n_estimators'])
            score = np.sqrt(mean_squared_error(y2, model.predict(xg_valid)))
            test['target'] += np.expm1(model.predict(xg_test))

        print('Fold', fold, 'Score', score)
        full_score += score

    full_score /= folds
    print('Full score', full_score)

    test['target'] /= folds

    test.loc[test_ref.target > 0, 'target'] = test_ref[test_ref.target > 0].target.values

    test[['ID', 'target']].to_csv('subxgb.csv', index=False)

    explain=False
    #explain=True
    if explain and not use_regressor:
        print(eli5.format_as_text(eli5.explain_weights(model, top=200)))
Beispiel #22
0
def expm1(x):
    """
    Calculate exp(x) - 1
    """
    if isinstance(x, UncertainFunction):
        mcpts = np.expm1(x._mcpts)
        return UncertainFunction(mcpts)
    else:
        return np.expm1(x)
Beispiel #23
0
 def testBijectiveAndFinite(self):
   with self.cached_session():
     bijector = Weibull(
         scale=20., concentration=2., validate_args=True)
     x = np.linspace(1., 8., num=10).astype(np.float32)
     y = np.linspace(
         -np.expm1(-1 / 400.),
         -np.expm1(-16), num=10).astype(np.float32)
     assert_bijective_and_finite(bijector, x, y, event_ndims=0, rtol=1e-3)
def root_mean_squared_percetage_error(y_true, y_pred):
    """
    Mean squared error regression loss
    """
    y_true = np.expm1(y_true)
    y_pred = np.expm1(y_pred)
    w = ToWeight(y_true)
    output_errors = np.mean(((y_true - y_pred) * w) ** 2)

    return float(np.sqrt(output_errors))
def predictsale(request):
    if 'dt' in request.POST:
        path1 = default_storage.open('mysite\\train.csv')
        path2 = default_storage.open('mysite\\test.csv')

        train_data = pd.read_csv(path1, parse_dates=[0])
        test_data = pd.read_csv(path2, parse_dates=[0])
    
        dt = request.POST['dt']
        d = parse(dt)
        test_data['day'] = d.day
        test_data['month'] = d.month
        test_data['year'] = d.year
        test_data['hour'] = d.hour
        test_data['season'] = int(request.POST['season'])
    
        test_data['temp'] = float(request.POST['temp'])
        test_data['atemp'] = float(request.POST['atemp'])
        test_data['humidity'] = int(request.POST['humidity'])
        test_data['windspeed'] = float(request.POST['windspeed'])
    
        weather_condition = request.POST['weather']
        if weather_condition=='Clear' or weather_condition=='Partly Cloudy' or weather_condition=='Very Hot':
            test_data['weather'] = 1
        if weather_condition=='Mostly Cloudy' or weather_condition=='Cloudy' or weather_condition=='Hazy' or weather_condition=='Chance of Showers' or weather_condition=='Chance of Rain' or weather_condition=='Chance of Showers' :
            test_data['weather'] = 2
        if weather_condition=='Very Cold' or weather_condition=='Showers' or weather_condition=='Rain' or weather_condition=='Chance of a Thunderstorm' or weather_condition=='Flurries' or weather_condition=='Chance of Snow Showers' or weather_condition=='Snow Showers' or weather_condition=='Chance of Snow':
            test_data['weather'] = 3
        if weather_condition=='Foggy' or weather_condition=='Blowing Snow' or weather_condition=='Thunderstorm' or weather_condition=='Snow' or weather_condition=='Ice Pellets' or weather_condition=='Chance of Ice Pellets' or weather_condition=='Blizzard':
            test_data['weather'] = 4
    
   
        dt_train = pd.DatetimeIndex(train_data['datetime'])
        train_data['year'] = dt_train.year
        train_data['month']= dt_train.month
        train_data['hour'] = dt_train.hour
        train_data['day'] = dt_train.day
    
        for colum in ['casual', 'registered', 'count']:
            train_data['log-' + colum] = train_data[colum].apply(lambda x: np.log1p(x))
    
        attrib = ['year','month', 'day', 'hour','season', 'weather','temp', 'atemp', 'humidity', 'windspeed']
     
        gbr = ensemble.GradientBoostingRegressor(n_estimators=80, learning_rate = .05, max_depth = 10,min_samples_leaf = 20)
   
        casual_pred= gbr.fit(train_data[attrib].values, train_data['log-casual'].values)
        registered_pred= gbr.fit(train_data[attrib].values, train_data['log-registered'].values)
        total = np.expm1(casual_pred.predict(test_data[attrib])) + np.expm1(registered_pred.predict(test_data[attrib]))
        
        print("sale :",total)
        return render(request, 'predictsale.html', {'total_sale': int(total[0]), 'date': dt})

    else:    
        return render(request, 'predictsale.html')
Beispiel #26
0
def xgboost_validset_submission():
    params = {"objective": "reg:linear",
          "eta": 0.3,
          "max_depth": 10,
          "subsample": 0.7,
          "colsample_bytree": 0.7,
          "silent": 1,
          "seed": 1301
    }
    num_boost_round = 300
    # need to split for a small validation set
    X_train_xgb, X_valid_xgb = train_test_split(train, test_size=0.012)
    y_train_xgb = np.log1p(X_train_xgb.Sales)
    y_valid_xgb = np.log1p(X_valid_xgb.Sales)
    dtrain = xgb.DMatrix(X_train_xgb[feature_names], y_train_xgb)
    dvalid = xgb.DMatrix(X_valid_xgb[feature_names], y_valid_xgb)

    watchlist = [(dvalid, 'eval'), (dtrain, 'train')]
    gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist,
                    early_stopping_rounds=100,
                    feval=c.rmspe_xg, verbose_eval=True)

    print("Validating")
    y_pred = gbm.predict(xgb.DMatrix(X_valid_xgb[feature_names]))
    error = c.rmspe(X_valid_xgb.Sales.values, np.expm1(y_pred))
    print('RMSPE: {:.6f}'.format(error))

    print("Make predictions on the test set")
    dtest = xgb.DMatrix(test[feature_names])
    test_probs = gbm.predict(dtest)
    # Make Submission
    result = pd.DataFrame({"Id": test["Id"],
                           'Sales': np.expm1(test_probs)})
    result.to_csv("xgboost_10_submission.csv", index=False)

    # XGB feature importances
    # Based on https://www.kaggle.com/mmueller/
    # liberty-mutual-group-property-inspection-prediction/
    # xgb-feature-importance-python/code

    ceate_feature_map(feature_names)
    importance = gbm.get_fscore(fmap='xgb.fmap')
    importance = sorted(importance.items(), key=operator.itemgetter(1))

    df = pd.DataFrame(importance, columns=['feature', 'fscore'])
    df['fscore'] = df['fscore'] / df['fscore'].sum()

    featp = df.plot(kind='barh', x='feature', y='fscore',
                    legend=False, figsize=(6, 10))
    plt.title('XGBoost Feature Importance')
    plt.xlabel('relative importance')
    fig_featp = featp.get_figure()
    fig_featp.savefig('feature_importance_xgb.png',
                      bbox_inches='tight', pad_inches=1)
Beispiel #27
0
def xgbFull(X_train,y_train,X_test):
    
    params = {}
    
    params["objective"] = "reg:linear"
    params["eta"] = 0.02
    params["min_child_weight"] = 6
    params["subsample"] = 0.7
    params["scale_pos_weight"] = 0.8
    params["silent"] = 1
    params["max_depth"] = 8
    params["max_delta_step"]=2
    plst = list(params.items())
    
    
    xgtest = xgb.DMatrix(X_test)
    y1 = np.log1p(y_train)
    y2 = np.power(y_train,1/16.0)
    
    num_rounds = 1000
    print(num_rounds)
    xgtrain = xgb.DMatrix(X_train,label=y1)
    m1 = xgb.train(plst,xgtrain,num_rounds)
    p1 = m1.predict(xgtest)
    p1 = np.expm1(p1)
    
    num_rounds = 2000
    print(num_rounds)
  
    xgtrain = xgb.DMatrix(X_train,label=y2)
    m2 = xgb.train(plst,xgtrain,num_rounds)
    p2 = m2.predict(xgtest)
    p2 = np.power(p2,16.0)
    
    num_rounds = 3000
    print(num_rounds)
    xgtrain = xgb.DMatrix(X_train,label=y1)
    m3 = xgb.train(plst,xgtrain,num_rounds)
    p3 = m3.predict(xgtest)
    p3 = np.expm1(p3)
    
    num_rounds = 4000
    print(num_rounds)
    xgtrain = xgb.DMatrix(X_train,label=y2)
    m4 = xgb.train(plst,xgtrain,num_rounds)
    p4 = m4.predict(xgtest)
    p4 = np.power(p4,16.0)
    
    
   
        
    return p1,p2,p3,p4
def save_predictions_per_store(output_dir, train_set, train_features, valid_set, valid_features, model):
    print ">> SAVING PREDICTIONS PER STORE"
    train = pd.DataFrame(train_set)
    train["PredSales"] = np.expm1(model.predict(xgb.DMatrix(train_features)))
    valid = pd.DataFrame(valid_set)
    valid["PredSales"] = np.expm1(model.predict(xgb.DMatrix(valid_features)))

    train = train.iloc[::-1]
    valid = valid.iloc[::-1]

    for store in train.Store.unique():
        df = train[train.Store == store].append(valid[valid.Store == store])
        output_path = path.join(output_dir, "store_%s.csv" % store)
        df[["Store", "Open", "Promo", "Date", "Sales", "PredSales"]].to_csv(output_path, index=False)
    def fit(self,x_train,y_train):
        batchsize = self.batchsize

        np.random.seed(self.seed)

        if self.cuda:
            cuda.get_device(0).use()
            self.model.to_gpu()
            xp = cuda.cupy
        else:
            xp = np
        self.xp = xp

        if self.split != 0.0:
            x_train_data, x_valid_data, y_train_data, y_valid_data = train_test_split(x_train, y_train, test_size=self.split, random_state=self.seed)
            print "train size:{} test_size:{}".format(len(x_train_data), len(y_valid_data))
            data = np.array(x_train_data,dtype=np.float32)
            valid_data = np.array(x_valid_data,dtype=np.float32)
            target = np.array(y_train_data,dtype=np.float32).reshape((len(data),1))
            valid_target = np.array(y_valid_data,dtype=np.float32).reshape((len(valid_data),1))
        else:
            data = np.array(x_train,dtype=np.float32)
            target = np.array(self.convert(y_train_data),dtype=np.float32).reshape((len(data),1))

        optimizer = optimizers.Adam()
        optimizer.setup(self.model)
        N = len(data)

        for epoch in xrange(self.epochs):
            print "epoch:",epoch
            perm = np.random.permutation(N)
            sum_loss = 0.0
            sum_original_loss = 0.0

            cnt = 0
            for i in xrange(0,N,batchsize):
                x = chainer.Variable(xp.asarray(data[perm[i:i + batchsize]]),volatile="off")
                t = chainer.Variable(xp.asarray(target[perm[i:i + batchsize]],dtype=np.float32),volatile="off")

                optimizer.update(self.model, x, t)
                sum_original_loss += float(self.model.loss.data) * len(t.data)
                cnt += 1

            if evaluate_function != None:
                prediction = self.predict(valid_data)
                loss = evaluate_function(np.expm1(valid_target),np.expm1(prediction),self.evaluate_function_name)
                sum_loss = loss
            print "original train loss:{}".format(sum_original_loss / N)
            print "train_loss:{}".format(sum_loss)
Beispiel #30
0
def int_linexp0(a, b, u0, u1, g, x0):
    """ This is the integral in [a, b] of u(x) * exp(g * (x0 - x)) * x 
    assuming that
    u is linear with u({a, b}) = {u0, u1}."""

    # Since u(x) is linear, we calculate separately the coefficients
    # of degree 0 and 1 which, after multiplying by the x in the integrand
    # correspond to 1 and 2

    # The expressions involve the following exponentials that are problematic:
    # expa = np.exp(g * (-a + x0))
    # expb = np.exp(g * (-b + x0))
    # The problems come with small g: in that case, the exp() rounds to 1
    # and neglects the order 1 and 2 terms that are required to cancel the
    # 1/g**2 and 1/g**3 below.  The solution is to rewrite the expressions
    # as functions of expm1(x) = exp(x) - 1, which is guaranteed to be accurate
    # even for small x.
    expm1a = np.expm1(g * (-a + x0))
    expm1b = np.expm1(g * (-b + x0))

    ag = a * g
    bg = b * g

    ag1 = ag + 1
    bg1 = bg + 1

    g2 = g * g
    g3 = g2 * g

    # These are the expressions as functions of expa/expb
    # A1 = (  expa * ag1
    #        - expb * bg1) / g2

    # A2 = (expa * (2 * ag1 + ag * ag) -
    #       expb * (2 * bg1 + bg * bg)) / g3

    A1 = (expm1a * ag1 + ag - expm1b * bg1 - bg) / g2

    A2 = (expm1a * (2 * ag1 + ag * ag) + ag * (ag + 2) - expm1b * (2 * bg1 + bg * bg) - bg * (bg + 2)) / g3

    # The factors multiplying each coefficient can be obtained by
    # the interpolation formula of u(x) = c0 + c1 * x
    c0 = (a * u1 - b * u0) / (a - b)
    c1 = (u0 - u1) / (a - b)

    r = c0 * A1 + c1 * A2

    # Where either F0 or F1 is 0 we return 0
    return np.where(np.isnan(r), 0.0, r)
Beispiel #31
0
for c, dtype in zip(test.columns, test.dtypes):
    if dtype == np.float64:
        test[c] = test[c].astype(np.float32)

train_x = train.drop(['air_store_id', 'visit_date', 'visitors'], axis=1)
train_y = np.log1p(train['visitors'].values)
print(train_x.shape, train_y.shape)
test_x = test.drop(['id', 'air_store_id', 'visit_date', 'visitors'], axis=1)

# parameter tuning of xgboost
# start from default setting
boost_params = {'eval_metric': 'rmse'}
xgb0 = xgb.XGBRegressor(
    max_depth=8,
    learning_rate=0.01,
    n_estimators=10000,
    objective='reg:linear',
    gamma=0,
    min_child_weight=1,
    subsample=1,
    colsample_bytree=1,
    scale_pos_weight=1,
    seed=27,
    **boost_params)

xgb0.fit(train_x, train_y)
predict_y = xgb0.predict(test_x)
test['visitors'] = np.expm1(predict_y)
test[['id', 'visitors']].to_csv(
    'xgb0_submission.csv', index=False, float_format='%.3f')  # LB0.495
Beispiel #32
0
 def _cdf(self, x, p):
     k = floor(x)
     return -expm1(log1p(-p) * k)
Beispiel #33
0
 def _stats(self, lambda_):
     mu = 1 / (exp(lambda_) - 1)
     var = exp(-lambda_) / (expm1(-lambda_))**2
     g1 = 2 * cosh(lambda_ / 2.0)
     g2 = 4 + 2 * cosh(lambda_)
     return mu, var, g1, g2

# In[27]:


def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))


# In[28]:


model_xgb.fit(train, y_train)
joblib.dump(model_xgb, 'xgboost_model.joblib')
xgb_train_pred = model_xgb.predict(train)
xgb_pred = np.expm1(model_xgb.predict(test))


# In[29]:


print('RMSLE score on train data:')
print(rmsle(y_train, xgb_train_pred*0.10 ))


# In[30]:


# Example
XGBoost = 1/(0.1177)
Beispiel #35
0
    "bagging_fraction": 0.4,
    "bagging_freq": 1,
    "feature_fraction": 0.68,
    "lambda_l1": 10,
}

evals_result = {}
model_lgb = lgbm.train(params,
                       lgtrain,
                       5000,
                       valid_sets=[lgval],
                       early_stopping_rounds=100,
                       verbose_eval=50,
                       evals_result=evals_result)

lg_preds = pd.DataFrame(np.expm1(model_lgb.predict(x_submit)))
lg_preds.insert(0, "ID", ids.values)
lg_preds.columns = ["ID", "target"]

lg_preds.to_csv("submit.csv", index=False)

grouped = train.groupby('target')
consolidated = pd.DataFrame(columns=train.columns[1:, ])
print(len(grouped))
i = 0
for name, group in grouped:
    if i % 50 == 0:
        print("XXXX")
        print(i)
        print("XXXX")
    consolidated = consolidated.append(group.mean(), ignore_index=True)
Beispiel #36
0
    def neuron_and_output_weights(self, current):
        # reduce all refractory times by dt
        self.refractory_time -= self.dt

        # compute effective dt for each neuron, based on remaining time.
        # note that refractory times that have completed midway into this
        # timestep will be given a partial timestep
        delta_t = (self.dt - self.refractory_time).clip(0, self.dt)

        # update voltage using discretized lowpass filter
        # since v(t) = v(0) + (J - v(0))*(1 - exp(-t/tau)) assuming
        # J is constant over the interval [t, t + dt)
        #print(self.voltage.shape)
        #print(current.shape)
        #print(delta_t.shape)
        #print(self.tau_rc.shape)
        self.voltage -= (current - self.voltage) * np.expm1(
            -delta_t / self.tau_rc)
        self.voltage[self.voltage < 0] = 0

        # this is only needed if we're doing learning
        self.learning_activity *= (1 - self.learning_scale)

        output = np.zeros(self.n_outputs)

        for i in range(self.n_neurons):
            # determine which neurons spiked this time step
            #  NOTE: this will be very sparse, since few neurons spike at once
            if self.voltage[i] > 1:
                # compute when during the timestep the spike happened
                log_result = np.log1p(-(self.voltage[i] - 1) /
                                      (current[i] - 1))
                t_spike = self.dt + self.tau_rc * log_result
                # use this time to set the refractory_time accurately
                self.refractory_time[i] = self.tau_ref + t_spike

                # set spiked voltages to zero, and rectify negative voltages to zero
                self.voltage[i] = 0

                # do the low-pass filter needed for learning
                self.learning_activity[i] += self.learning_scale

                # handle the output connection weights
                output += self.decoders[:, i]
                '''
                if self.obj_id == 1 and self.time <= 10 and i == 10:
                    print("time: "+str(self.time)+", log result: "+str(log_result))
                    print("time: "+str(self.time)+", t spike: "+str(t_spike*1000))
                '''
        '''
        if self.obj_id == 1:
            if self.debug_count == 0:
                for i in range(90,100):
                    print(self.voltage[i])
                #print("current: "+str(self.current))
                self.debug_count = 1    
        '''
        ''' 
        if self.obj_id == 1:
            #print(current[90])
            print(output)
        '''

        return output
Beispiel #37
0
def expected2(a):
    return np.sum(np.expm1(a) + np.ceil(a + 0.5) * np.rint(a + 1.5))
Beispiel #38
0
def numpy_math2(a):
    sum = 0.0
    for i in range(a.shape[0]):
        sum += np.expm1(a[i]) + np.ceil(a[i] + 0.5) * np.rint(a[i] + 1.5)
    return sum
def compound(r):
    """
    returns the result of compounding the set of returns in r
    """
    return np.expm1(np.log1p(r).sum())
def inst_to_ann(r):
    """
    Convert an instantaneous interest rate to an annual interest rate
    """
    return np.expm1(r)
Beispiel #41
0
# Validate 


#------------------------------------------------------------------------------------------#
# Submit
logger.info('Making submission...')

y_test = np.array(test_pred).transpose()
df_preds = pd.DataFrame(
    y_test, index=df_2017.index,
    columns=pd.date_range("2017-08-16", periods=16)
).stack().to_frame("unit_sales")
df_preds.index.set_names(["store_nbr", "item_nbr", "date"], inplace=True)

submission = df_test[["id"]].join(df_preds, how="left").fillna(0)
submission["unit_sales"] = np.clip(np.expm1(submission["unit_sales"]), 0, 1000)
submission.to_csv('../submit/T016_tmp.csv', float_format='%.4f', index=None)

####### PZ, Check overral result
print("SUM =",  submission.unit_sales.sum())
print("MEAN =",  submission.unit_sales.mean())


#------------------------------------------------------------------------------------------#
df_prev = submission

df_sub= pd.read_csv('../input/sub_zero3m.csv')

t_new = pd.merge(df_prev, df_sub,  on=['id'], how = 'left')
t_new['unit_sales'] = t_new.unit_sales_y.combine_first(t_new.unit_sales_x)
Beispiel #42
0
    if dtype == np.float64:
        test[c] = test[c].astype(np.float32)

train_x = train.drop(['air_store_id', 'visit_date', 'visitors'], axis=1)
train_y = np.log1p(train['visitors'].values)
test_x = test.drop(['id', 'air_store_id', 'visit_date', 'visitors'], axis=1)

print("\n [1] Przetworzono dane")
print("\n [2]: Regresja liniowa..")
reg = LinearRegression()
reg.fit(train_x, train_y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

pred = reg.predict(test_x)
test['visitors[linear]'] = np.expm1(pred)

print(" Zrobione!")

print("\n [3]: Drzewo decyzyjne..")
reg = DecisionTreeRegressor(max_depth=20)
reg.fit(train_x, train_y)

DecisionTreeRegressor(criterion='mse',
                      max_depth=20,
                      max_features=None,
                      max_leaf_nodes=None,
                      min_impurity_split=1e-07,
                      min_samples_leaf=1,
                      min_samples_split=2,
                      min_weight_fraction_leaf=0.0,
Beispiel #43
0
                              metric='rmse',
                              is_training_metric=True,
                              max_bin=55,
                              bagging_fraction=0.8,
                              verbose=-1,
                              bagging_freq=5,
                              feature_fraction=0.9)
score = rmsle_cv(model_xgb)
print("Xgboost score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
score = rmsle_cv(model_lgb)
print("LGBM score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
averaged_models = AveragingModels(models=(model_xgb, model_lgb))
score = rmsle_cv(averaged_models)
print("averaged score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))
averaged_models.fit(train.values, y_train)
pred = np.expm1(averaged_models.predict(test.values))
ensemble = pred
sub = pd.DataFrame()
sub['ID'] = test_ID
sub['target'] = ensemble
sub.to_csv('submission.csv', index=False)

#Xgboost score: 1.3582 (0.0640)
#LGBM score: 1.3437 (0.0519)
#averaged score: 1.3431 (0.0586)

#Xgboost score: 1.3566 (0.0525)
#LGBM score: 1.3477 (0.0497)
#averaged score: 1.3438 (0.0516)

#Xgboost score: 1.3540 (0.0621)
Beispiel #44
0
              np.sqrt(mean_squared_error(df_submission.price.values, preds)))

    del submission_keras, df_submission
    gc.collect()

submission_preds_df = pd.DataFrame(models_predictions)

if split > 0:
    print(
        'ENSEMBLE MEAN SCORE :',
        np.sqrt(mean_squared_error(sub_price,
                                   submission_preds_df.mean(axis=1))))
    print(' ')
    from sklearn.linear_model import LinearRegression
    lr = LinearRegression()
    lr.fit(submission_preds_df.values, sub_price)
    preds = lr.predict(submission_preds_df.values)
    print('ENSEMBLE LR SCORE :', np.sqrt(mean_squared_error(sub_price, preds)))
    print(lr.coef_)

if split == -1:
    mysubmission = pd.DataFrame()
    mysubmission['test_id'] = submission_idx
    preds = np.expm1(submission_preds_df.mean(axis=1))
    preds[preds < 3] = 3
    preds[preds > 1000] = 1000
    mysubmission['price'] = preds
    mysubmission.to_csv('mean.csv', index=False)

    print(mysubmission.shape)