def test_grad_objective_function_infinite_end_time_two_features(): df = pd.DataFrame( [[0, np.nan, 1, random()], [random(), np.nan, 1, random()], [0, random(), 1, random()], [0, random(), 1, random()]], columns=['click_time', 'conv_time', 'intercept', 'random']) features = ['intercept', 'random'] end_time = 1000000 y = df[['click_time', 'conv_time']].values X = csr_matrix(df[features].values) grad_objective_function = grad_objective_function_factory( fit_intercept=False) input_ = prepare_input(y, X, end_time=end_time) input_['Jacobian'] = np.zeros(4) y1 = input_['y1'] x0, x1 = input_['x0'], input_['x1'] w_zero = np.zeros(4) input_['mu'] = 0 assert np.allclose( grad_objective_function(w_zero, **input_), np.array([ 0, 1 / 2 * x0[:, 1].sum() - 1 / 2 * x1[:, 1].sum(), (y1[:, 1] - y1[:, 0] - 1).sum(), ((y1[:, 1] - y1[:, 0] - 1) * x1[:, 1]).sum() ]))
def test_objective_function_zero_and_one_weights(): end_time = 10 * random() df = pd.DataFrame( [[0, np.nan, 1], [end_time * random(), np.nan, 1], [0, end_time * random(), 1], [0, end_time * random(), 1]], columns=['click_time', 'conv_time', 'intercept']) features = ['intercept'] end_time = 10 y = df[['click_time', 'conv_time']].values X = csr_matrix(df[features].values) objective_function = objective_function_factory(fit_intercept=False) input_ = prepare_input(y, X, end_time=end_time) input_['Jacobian'] = np.zeros(2) y0, y1 = input_['y0'], input_['y1'] w_zero = np.zeros(2) w_one = np.ones(2) a0_zero = -np.sum(np.log(1 / 2 + 1 / 2 * np.exp(-(end_time - y0[:, 0])))) a1_zero = y1.shape[0] * np.log(2) + (y1[:, 1] - y1[:, 0]).sum() a0_one = -np.sum( np.log(1 - sigmoid(1) + sigmoid(1) * np.exp(-np.exp(1) * (end_time - y0[:, 0])))) a1_one = -y1.shape[0] * (np.log(sigmoid(1)) + 1) + np.exp(1) * (y1[:, 1] - y1[:, 0]).sum() assert np.allclose(objective_function(w_zero, **input_), a0_zero + a1_zero) assert np.allclose(objective_function(w_one, **input_), a0_one + a1_one + 1.)
def test_grad_objective_function_infinite_end_time(): df = pd.DataFrame([[0, np.nan, 1], [random(), np.nan, 1], [0, random(), 1], [0, random(), 1]], columns=['click_time', 'conv_time', 'intercept']) end_time = 1000000 y = df[['click_time', 'conv_time']].values X = csr_matrix(df[['intercept']].values) grad_objective_function = grad_objective_function_factory( fit_intercept=False) input_ = prepare_input(y, X, end_time=end_time) input_['Jacobian'] = np.zeros(2) y1 = input_['y1'] w_zero = np.zeros(2) w_one = np.ones(2) assert np.allclose(grad_objective_function(w_zero, **input_), np.array([0, (y1[:, 1] - y1[:, 0]).sum() - 2])) assert np.allclose( grad_objective_function(w_one, **input_), np.array([ df.shape[0] * sigmoid(1) - y1.shape[0], np.exp(1) * (y1[:, 1] - y1[:, 0]).sum() - 2 ]) + w_one)
def test_minimize(): df = pd.DataFrame( [[0., np.nan, random()], [random(), np.nan, random()], [0., random(), random()], [0., random(), random()]], columns=['click_time', 'conv_time', 'random_feature']) features = ['random_feature'] end_time = 1. y = df[['click_time', 'conv_time']].as_matrix() X = csr_matrix(df[features]) input_ = prepare_input(y, X, end_time=end_time) input_['Jacobian'] = np.array([random(), random(), random(), random()]) clf = ConversionEstimator(end_time=end_time) clf.fit(X, y) assert isinstance(clf.coef_, np.ndarray) and isinstance( clf.lag_coef_, np.ndarray) assert clf.convergence_info['success'] assert clf.convergence_info['message'] in { b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL', b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' }
def fit(self, X, y): utils_input = (y, X, self.end_time) opt_input = prepare_input(*utils_input) self.convergence_info = self._get_optimization_result(**opt_input) self._get_optimal_params(self.convergence_info) return self
def test_objective_function_output_type(test_data_input): p = test_data_input['X'].shape[1] objective_function = objective_function_factory(fit_intercept=False) input_ = prepare_input(**test_data_input) w = np.zeros(2 * p) input_['Jacobian'] = np.zeros(2 * p) assert isinstance(objective_function(w, **input_), float)
def test_minimize_large(test_dummied_matrix): y, X = test_dummied_matrix end_time = 1.1 * y[:, 1][~np.isnan(y[:, 1])].max() input_ = prepare_input(y, X, end_time=end_time) input_['Jacobian'] = np.array([random() for _ in range(2 * X.shape[1])]) clf = ConversionEstimator(end_time=end_time) clf.fit(X, y) assert isinstance(clf.coef_, np.ndarray) and isinstance( clf.lag_coef_, np.ndarray) assert clf.convergence_info['success'] assert clf.convergence_info['message'] in { b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL', b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' }
def test_prepare_input(): features = [ 'int_feat_1', 'int_feat_2', 'int_feat_3', 'cat_feat_1', 'cat_feat_2', 'cat_feat_3' ] df = pd.read_csv('convpy/tests/data/test_conv_logs.csv') enc = OneHotEncoderCOO() X = enc.transform(df[features].values) y = df[['click_time', 'conv_time']].values end_time = 10. input_ = prepare_input(y, X, end_time) assert isinstance(input_['y0'], np.ndarray) assert isinstance(input_['y1'], np.ndarray) assert isinstance(input_['x0'], csr_matrix) assert isinstance(input_['x1'], csr_matrix) assert isinstance(input_['diagonal0'], coo_matrix) assert isinstance(input_['diagonal1'], coo_matrix)
def get_fitted_estimator_and_encoder(): df = pd.DataFrame( [[0., np.nan, random()], [random(), np.nan, random()], [0., random(), random()], [0., random(), random()]], columns=['click_time', 'conv_time', 'random_feature']) features = ['random_feature'] end_time = 1. y, X = df[['click_time', 'conv_time']].values, df[features].values X_enc = csr_matrix(X) input_ = prepare_input(y, X_enc, end_time=end_time) input_['Jacobian'] = np.array([random(), random(), random(), random()]) estimator = ConversionEstimator(end_time=end_time) estimator.fit(X_enc, y) encoder = OneHotEncoderCOO(features=['random_feature']) encoder.fit(X) return estimator, encoder
def test_scipy_check_grad(test_dummied_matrix): y, X = test_dummied_matrix end_time = 1.1 * y[:, 1][~np.isnan(y[:, 1])].max() for fit_intercept in [True, False]: objective_function = objective_function_factory( fit_intercept=fit_intercept) grad_objective_function = grad_objective_function_factory( fit_intercept=fit_intercept) input_ = prepare_input(y, X, end_time=end_time) p = input_['x0'].shape[1] input_['Jacobian'] = np.zeros(2 * p) input_['mu'] = 0. for i in range(20): w0 = (5. - i) * np.ones(2 * p) L = partial(objective_function, **input_) DL = partial(grad_objective_function, **input_) assert isclose(check_grad(L, DL, w0), 0.)