Beispiel #1
0
def test_grad_objective_function_infinite_end_time_two_features():
    df = pd.DataFrame(
        [[0, np.nan, 1, random()], [random(), np.nan, 1,
                                    random()],
         [0, random(), 1, random()], [0, random(), 1, random()]],
        columns=['click_time', 'conv_time', 'intercept', 'random'])
    features = ['intercept', 'random']
    end_time = 1000000

    y = df[['click_time', 'conv_time']].values
    X = csr_matrix(df[features].values)

    grad_objective_function = grad_objective_function_factory(
        fit_intercept=False)

    input_ = prepare_input(y, X, end_time=end_time)
    input_['Jacobian'] = np.zeros(4)

    y1 = input_['y1']
    x0, x1 = input_['x0'], input_['x1']

    w_zero = np.zeros(4)
    input_['mu'] = 0

    assert np.allclose(
        grad_objective_function(w_zero, **input_),
        np.array([
            0, 1 / 2 * x0[:, 1].sum() - 1 / 2 * x1[:, 1].sum(),
            (y1[:, 1] - y1[:, 0] - 1).sum(),
            ((y1[:, 1] - y1[:, 0] - 1) * x1[:, 1]).sum()
        ]))
Beispiel #2
0
def test_objective_function_zero_and_one_weights():
    end_time = 10 * random()
    df = pd.DataFrame(
        [[0, np.nan, 1], [end_time * random(), np.nan, 1],
         [0, end_time * random(), 1], [0, end_time * random(), 1]],
        columns=['click_time', 'conv_time', 'intercept'])
    features = ['intercept']
    end_time = 10

    y = df[['click_time', 'conv_time']].values
    X = csr_matrix(df[features].values)

    objective_function = objective_function_factory(fit_intercept=False)

    input_ = prepare_input(y, X, end_time=end_time)
    input_['Jacobian'] = np.zeros(2)

    y0, y1 = input_['y0'], input_['y1']

    w_zero = np.zeros(2)
    w_one = np.ones(2)

    a0_zero = -np.sum(np.log(1 / 2 + 1 / 2 * np.exp(-(end_time - y0[:, 0]))))
    a1_zero = y1.shape[0] * np.log(2) + (y1[:, 1] - y1[:, 0]).sum()
    a0_one = -np.sum(
        np.log(1 - sigmoid(1) + sigmoid(1) * np.exp(-np.exp(1) *
                                                    (end_time - y0[:, 0]))))
    a1_one = -y1.shape[0] * (np.log(sigmoid(1)) +
                             1) + np.exp(1) * (y1[:, 1] - y1[:, 0]).sum()

    assert np.allclose(objective_function(w_zero, **input_), a0_zero + a1_zero)
    assert np.allclose(objective_function(w_one, **input_),
                       a0_one + a1_one + 1.)
Beispiel #3
0
def test_grad_objective_function_infinite_end_time():
    df = pd.DataFrame([[0, np.nan, 1], [random(), np.nan, 1],
                       [0, random(), 1], [0, random(), 1]],
                      columns=['click_time', 'conv_time', 'intercept'])

    end_time = 1000000

    y = df[['click_time', 'conv_time']].values
    X = csr_matrix(df[['intercept']].values)

    grad_objective_function = grad_objective_function_factory(
        fit_intercept=False)

    input_ = prepare_input(y, X, end_time=end_time)
    input_['Jacobian'] = np.zeros(2)

    y1 = input_['y1']

    w_zero = np.zeros(2)
    w_one = np.ones(2)

    assert np.allclose(grad_objective_function(w_zero, **input_),
                       np.array([0, (y1[:, 1] - y1[:, 0]).sum() - 2]))
    assert np.allclose(
        grad_objective_function(w_one, **input_),
        np.array([
            df.shape[0] * sigmoid(1) - y1.shape[0],
            np.exp(1) * (y1[:, 1] - y1[:, 0]).sum() - 2
        ]) + w_one)
Beispiel #4
0
def test_minimize():
    df = pd.DataFrame(
        [[0., np.nan, random()], [random(), np.nan, random()],
         [0., random(), random()], [0., random(), random()]],
        columns=['click_time', 'conv_time', 'random_feature'])

    features = ['random_feature']
    end_time = 1.

    y = df[['click_time', 'conv_time']].as_matrix()
    X = csr_matrix(df[features])

    input_ = prepare_input(y, X, end_time=end_time)
    input_['Jacobian'] = np.array([random(), random(), random(), random()])

    clf = ConversionEstimator(end_time=end_time)
    clf.fit(X, y)

    assert isinstance(clf.coef_, np.ndarray) and isinstance(
        clf.lag_coef_, np.ndarray)
    assert clf.convergence_info['success']
    assert clf.convergence_info['message'] in {
        b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL',
        b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
    }
Beispiel #5
0
    def fit(self, X, y):

        utils_input = (y, X, self.end_time)
        opt_input = prepare_input(*utils_input)

        self.convergence_info = self._get_optimization_result(**opt_input)
        self._get_optimal_params(self.convergence_info)

        return self
Beispiel #6
0
def test_objective_function_output_type(test_data_input):
    p = test_data_input['X'].shape[1]

    objective_function = objective_function_factory(fit_intercept=False)

    input_ = prepare_input(**test_data_input)

    w = np.zeros(2 * p)
    input_['Jacobian'] = np.zeros(2 * p)

    assert isinstance(objective_function(w, **input_), float)
Beispiel #7
0
def test_minimize_large(test_dummied_matrix):

    y, X = test_dummied_matrix

    end_time = 1.1 * y[:, 1][~np.isnan(y[:, 1])].max()

    input_ = prepare_input(y, X, end_time=end_time)
    input_['Jacobian'] = np.array([random() for _ in range(2 * X.shape[1])])

    clf = ConversionEstimator(end_time=end_time)
    clf.fit(X, y)

    assert isinstance(clf.coef_, np.ndarray) and isinstance(
        clf.lag_coef_, np.ndarray)
    assert clf.convergence_info['success']
    assert clf.convergence_info['message'] in {
        b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL',
        b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
    }
Beispiel #8
0
def test_prepare_input():
    features = [
        'int_feat_1', 'int_feat_2', 'int_feat_3', 'cat_feat_1', 'cat_feat_2',
        'cat_feat_3'
    ]

    df = pd.read_csv('convpy/tests/data/test_conv_logs.csv')

    enc = OneHotEncoderCOO()
    X = enc.transform(df[features].values)
    y = df[['click_time', 'conv_time']].values
    end_time = 10.

    input_ = prepare_input(y, X, end_time)

    assert isinstance(input_['y0'], np.ndarray)
    assert isinstance(input_['y1'], np.ndarray)
    assert isinstance(input_['x0'], csr_matrix)
    assert isinstance(input_['x1'], csr_matrix)
    assert isinstance(input_['diagonal0'], coo_matrix)
    assert isinstance(input_['diagonal1'], coo_matrix)
Beispiel #9
0
    def get_fitted_estimator_and_encoder():
        df = pd.DataFrame(
            [[0., np.nan, random()], [random(), np.nan,
                                      random()],
             [0., random(), random()], [0., random(), random()]],
            columns=['click_time', 'conv_time', 'random_feature'])

        features = ['random_feature']
        end_time = 1.
        y, X = df[['click_time', 'conv_time']].values, df[features].values
        X_enc = csr_matrix(X)
        input_ = prepare_input(y, X_enc, end_time=end_time)

        input_['Jacobian'] = np.array([random(), random(), random(), random()])

        estimator = ConversionEstimator(end_time=end_time)
        estimator.fit(X_enc, y)

        encoder = OneHotEncoderCOO(features=['random_feature'])
        encoder.fit(X)

        return estimator, encoder
Beispiel #10
0
def test_scipy_check_grad(test_dummied_matrix):

    y, X = test_dummied_matrix

    end_time = 1.1 * y[:, 1][~np.isnan(y[:, 1])].max()

    for fit_intercept in [True, False]:
        objective_function = objective_function_factory(
            fit_intercept=fit_intercept)
        grad_objective_function = grad_objective_function_factory(
            fit_intercept=fit_intercept)

        input_ = prepare_input(y, X, end_time=end_time)
        p = input_['x0'].shape[1]
        input_['Jacobian'] = np.zeros(2 * p)
        input_['mu'] = 0.
        for i in range(20):
            w0 = (5. - i) * np.ones(2 * p)
            L = partial(objective_function, **input_)
            DL = partial(grad_objective_function, **input_)

        assert isclose(check_grad(L, DL, w0), 0.)