Beispiel #1
0
def test_res(blob_bin, C):
    """Check LogisticRegression is equivalent to sklearn for two-class case.

    Checks the fit, predict, and score methods, checking solution and accuracy.

    Parameters
    ----------
    blob_bin : tuple
        pytest fixture. See conftest.py.
    C : float
        Inverse regularization parameter for the LogisticRegression class.
    """
    # unpack data from fixture
    X_train, X_test, y_train, y_test = blob_bin
    # hyperparameters to fix (in case defaults change + to use value of C)
    shared_params = dict(tol=1e-4, C=C, max_iter=100)
    # fit scikit-learn model and our model
    _lc = _LogisticRegression(**shared_params).fit(X_train, y_train)
    lc = LogisticRegression(**shared_params).fit(X_train, y_train)
    # check that coefficients and intercepts are close. scikit-learn's coef_
    # vector has extra dimension and has intercept_ as an array.
    np.testing.assert_allclose(_lc.coef_.ravel(), lc.coef_)
    np.testing.assert_allclose(_lc.intercept_[0], lc.intercept_)
    # check that predictions are close
    np.testing.assert_allclose(_lc.predict(X_test), lc.predict(X_test))
    # accuracy should be the same
    np.testing.assert_allclose(_lc.score(X_test, y_test),
                               lc.score(X_test, y_test))
Beispiel #2
0
def bradleyterry_rank(games, shrinkage=1e-8):
    M, y = bradleyterry_logistic_model(games)
    C = 1. / shrinkage
    model = _LogisticRegression(C=C, fit_intercept=True)
    model.fit(M, y)
    r = model.coef_.ravel()
    return _pd.Series(r, index=M.columns)
    def __init__(self, *args):
        try:
            self.response_variable = args[0]
            self.explanatory_variables = args[2:] if args[1] == '~' or args[1].lower() == 'from' else args[1:]
            assert len(self.explanatory_variables) > 0
        except:
            raise Exception('Syntax error: Expected "<target> FROM <field> ...')

        self.estimator = _LogisticRegression(class_weight='auto')
        self.is_classifier = True
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept', 'probabilities'],
        )

        if 'probabilities' in out_params:
            del out_params['probabilities']

        self.estimator = _LogisticRegression(class_weight='balanced',
                                             **out_params)
Beispiel #5
0
    def __init__(self, **kwargs):
        """Initialize a Logistic Regression Classifier.

        Additional keyword arguments will be passed to the classifier
        initialization which is ``sklearn.linear_model.LogisticRegression``
        here.

        Keyword Arguments
        -----------------
        C: int, default = 100
        """
        super(LogisticRegression, self).__init__()
        self.C = kwargs.pop("C", 100)
        self.clf = _LogisticRegression(C=self.C, **kwargs)
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept', 'probabilities'],
            strs=['solver', 'multi_class', 'class_weight'])

        # Solver
        if 'solver' in out_params:
            if out_params['solver'] not in ['newton-cg', 'lbfgs']:
                raise RuntimeError('solver must be either: newton-cg or lbfgs')
        else:
            # default
            out_params['solver'] = 'newton-cg'

        # Multiclass
        if 'mutli_class' in out_params:
            if out_params['multi_class'] not in ['ovr', 'multinomial']:
                raise RuntimeError(
                    'multi_class must be either: ovr or multinomial')

        if 'class_weight' in out_params:
            try:
                weights = out_params['class_weight'].replace('\'', '"')
                out_params['class_weight'] = json.loads(weights)
            except Exception:
                s = """Example: class_weight="{'field_one': 0.3, 'field_two': 0.7}"""
                messages.error(
                    "Unable to load class_weight dictionary. Are field names correct? %s"
                    % s)
                messages.warn("Setting class_weight to 'balanced'.")
        else:
            out_params['class_weight'] = 'balanced'

        if 'probabilities' in out_params:
            del out_params['probabilities']

        self.estimator = _LogisticRegression(**out_params)
Beispiel #7
0
def estimate_dMRF(
    strajs,
    lag=1,
    stride=1,
    Encoder=_OneHotEncoder(sparse=False),
    logistic_regression_kwargs={
        'fit_intercept': False,
        'penalty': 'l1',
        'C': 1.,
        'tol': 1e-4,
        'solver': 'saga'
    }):
    """
        Estimate dMRF using logistic (binary sub-systems) or softmax (multinomal sub-systems) regression.

        Arguments:
        --------------------
        strajs (list of ndarrays): state of each subsystem as a function of time.
        lag (int=1): lag-time used in auto-regression
        stride (int=1): data stride prior to model estimation. lag should be devisible by this quantity.
        Encoder (sklearn compatible categorical pre-processor): Encoder for spin-states, usually OneHotEncoder is recommended. 
        logistic_regression_kwargs (dict): dictionary of keyword arguments forwarded to 
            sklearn LogisticRegression. 
            The multi_class kwargs is forced to 'ovr' for binary cases and 'multinomial' for multinomial cases.
        
        returns:
            dMRF instance -- estimated dMRF.
    """
    if stride > lag:
        raise ValueError(
            "Stride exceeds lag. Lag has to be larger or equal to stride.")
    strided_strajs = [t[::stride] for t in strajs]
    P0 = _np.vstack([t[:-lag // stride] for t in strided_strajs])
    Pt = _np.vstack([t[lag // stride:] for t in strided_strajs])
    nframes_strided, nsubsys = P0.shape

    #find active sub-systems
    active_subsystems_0 = _np.where(
        [len(_np.unique(P0[:, i])) > 1 for i in range(nsubsys)])[0]
    active_subsystems_t = _np.where(
        [len(_np.unique(Pt[:, i])) > 1 for i in range(nsubsys)])[0]
    active_subsystems = list(
        set(active_subsystems_0).intersection(active_subsystems_t))
    lrs = []

    #remove constant sub-systems
    P0 = P0[:, active_subsystems]
    Pt = Pt[:, active_subsystems]

    P0 = Encoder.fit_transform(P0)

    for i in range(Pt.shape[1]):
        # if only two categories use one-versus-rest estimation mode
        logistic_regression_kwargs['multi_class'] = 'ovr'

        if len(_np.unique(Pt[:, i])) > 2:
            # if we have more than 2 states change multiclass flag to multinomial
            logistic_regression_kwargs['multi_class'] = 'multinomial'
        logr = _LogisticRegression(**logistic_regression_kwargs).fit(
            P0, Pt[:, i])
        lrs.append(logr)

    return dMRF(lrs, active_subsystems, lag=lag, enc=Encoder, estimated=True)