def test_res(blob_bin, C): """Check LogisticRegression is equivalent to sklearn for two-class case. Checks the fit, predict, and score methods, checking solution and accuracy. Parameters ---------- blob_bin : tuple pytest fixture. See conftest.py. C : float Inverse regularization parameter for the LogisticRegression class. """ # unpack data from fixture X_train, X_test, y_train, y_test = blob_bin # hyperparameters to fix (in case defaults change + to use value of C) shared_params = dict(tol=1e-4, C=C, max_iter=100) # fit scikit-learn model and our model _lc = _LogisticRegression(**shared_params).fit(X_train, y_train) lc = LogisticRegression(**shared_params).fit(X_train, y_train) # check that coefficients and intercepts are close. scikit-learn's coef_ # vector has extra dimension and has intercept_ as an array. np.testing.assert_allclose(_lc.coef_.ravel(), lc.coef_) np.testing.assert_allclose(_lc.intercept_[0], lc.intercept_) # check that predictions are close np.testing.assert_allclose(_lc.predict(X_test), lc.predict(X_test)) # accuracy should be the same np.testing.assert_allclose(_lc.score(X_test, y_test), lc.score(X_test, y_test))
def bradleyterry_rank(games, shrinkage=1e-8): M, y = bradleyterry_logistic_model(games) C = 1. / shrinkage model = _LogisticRegression(C=C, fit_intercept=True) model.fit(M, y) r = model.coef_.ravel() return _pd.Series(r, index=M.columns)
def __init__(self, *args): try: self.response_variable = args[0] self.explanatory_variables = args[2:] if args[1] == '~' or args[1].lower() == 'from' else args[1:] assert len(self.explanatory_variables) > 0 except: raise Exception('Syntax error: Expected "<target> FROM <field> ...') self.estimator = _LogisticRegression(class_weight='auto') self.is_classifier = True
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'probabilities'], ) if 'probabilities' in out_params: del out_params['probabilities'] self.estimator = _LogisticRegression(class_weight='balanced', **out_params)
def __init__(self, **kwargs): """Initialize a Logistic Regression Classifier. Additional keyword arguments will be passed to the classifier initialization which is ``sklearn.linear_model.LogisticRegression`` here. Keyword Arguments ----------------- C: int, default = 100 """ super(LogisticRegression, self).__init__() self.C = kwargs.pop("C", 100) self.clf = _LogisticRegression(C=self.C, **kwargs)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'probabilities'], strs=['solver', 'multi_class', 'class_weight']) # Solver if 'solver' in out_params: if out_params['solver'] not in ['newton-cg', 'lbfgs']: raise RuntimeError('solver must be either: newton-cg or lbfgs') else: # default out_params['solver'] = 'newton-cg' # Multiclass if 'mutli_class' in out_params: if out_params['multi_class'] not in ['ovr', 'multinomial']: raise RuntimeError( 'multi_class must be either: ovr or multinomial') if 'class_weight' in out_params: try: weights = out_params['class_weight'].replace('\'', '"') out_params['class_weight'] = json.loads(weights) except Exception: s = """Example: class_weight="{'field_one': 0.3, 'field_two': 0.7}""" messages.error( "Unable to load class_weight dictionary. Are field names correct? %s" % s) messages.warn("Setting class_weight to 'balanced'.") else: out_params['class_weight'] = 'balanced' if 'probabilities' in out_params: del out_params['probabilities'] self.estimator = _LogisticRegression(**out_params)
def estimate_dMRF( strajs, lag=1, stride=1, Encoder=_OneHotEncoder(sparse=False), logistic_regression_kwargs={ 'fit_intercept': False, 'penalty': 'l1', 'C': 1., 'tol': 1e-4, 'solver': 'saga' }): """ Estimate dMRF using logistic (binary sub-systems) or softmax (multinomal sub-systems) regression. Arguments: -------------------- strajs (list of ndarrays): state of each subsystem as a function of time. lag (int=1): lag-time used in auto-regression stride (int=1): data stride prior to model estimation. lag should be devisible by this quantity. Encoder (sklearn compatible categorical pre-processor): Encoder for spin-states, usually OneHotEncoder is recommended. logistic_regression_kwargs (dict): dictionary of keyword arguments forwarded to sklearn LogisticRegression. The multi_class kwargs is forced to 'ovr' for binary cases and 'multinomial' for multinomial cases. returns: dMRF instance -- estimated dMRF. """ if stride > lag: raise ValueError( "Stride exceeds lag. Lag has to be larger or equal to stride.") strided_strajs = [t[::stride] for t in strajs] P0 = _np.vstack([t[:-lag // stride] for t in strided_strajs]) Pt = _np.vstack([t[lag // stride:] for t in strided_strajs]) nframes_strided, nsubsys = P0.shape #find active sub-systems active_subsystems_0 = _np.where( [len(_np.unique(P0[:, i])) > 1 for i in range(nsubsys)])[0] active_subsystems_t = _np.where( [len(_np.unique(Pt[:, i])) > 1 for i in range(nsubsys)])[0] active_subsystems = list( set(active_subsystems_0).intersection(active_subsystems_t)) lrs = [] #remove constant sub-systems P0 = P0[:, active_subsystems] Pt = Pt[:, active_subsystems] P0 = Encoder.fit_transform(P0) for i in range(Pt.shape[1]): # if only two categories use one-versus-rest estimation mode logistic_regression_kwargs['multi_class'] = 'ovr' if len(_np.unique(Pt[:, i])) > 2: # if we have more than 2 states change multiclass flag to multinomial logistic_regression_kwargs['multi_class'] = 'multinomial' logr = _LogisticRegression(**logistic_regression_kwargs).fit( P0, Pt[:, i]) lrs.append(logr) return dMRF(lrs, active_subsystems, lag=lag, enc=Encoder, estimated=True)