Example #1
0
        def __init__(self, w, fair_feature, X_train, y_train):
            self.w = w
            self.fair_feature = fair_feature

            clf0 = LogisticRegression(random_state=0).fit(
                X_train.loc[:, X_train.columns != fair_feature], y_train)
            clf0.coef_ = w[0].value[1:].T
            clf0.intercept_ = w[0].value[0][0]
            self.clf0 = clf0

            clf1 = LogisticRegression(random_state=0).fit(
                X_train.loc[:, X_train.columns != fair_feature], y_train)
            clf1.coef_ = w[1].value[1:].T
            clf1.intercept_ = w[1].value[0][0]
            self.clf1 = clf1
Example #2
0
    def train_clf(self, X, idxss, rs):
        N = sum(len(idx) for idx in idxss)
        n_epochs = self.compute_epochs(N)

        if self.optimization == 'fastxml':
            penalty = 'l1'
        else:
            penalty = 'l2'

        X_train, y_train = self.build_XY(X, idxss, rs)

        in_liblinear = X_train.shape[0] > (self.auto_weight * self.max_leaf_size)
        if self.engine == 'liblinear' or (self.engine == 'auto' and in_liblinear):
            if self.loss == 'log':
                # No control over penalty
                clf = LogisticRegression(solver='liblinear', random_state=rs, tol=1, 
                        C=self.C, penalty=penalty)
            else:
                clf = LinearSVC(C=self.C, fit_intercept=self.bias, 
                        max_iter=n_epochs, class_weight='balanced', 
                        penalty=penalty, random_state=rs)

        else:
            clf = SGDClassifier(loss=self.loss, penalty=penalty, n_iter=n_epochs, 
                    alpha=self.alpha, fit_intercept=self.bias, class_weight='balanced',
                    random_state=rs)

        clf.fit(X_train, y_train)

        # Halves the memory requirement
        clf.coef_ = sparsify(clf.coef_, self.eps)
        if self.bias:
            clf.intercept_ = clf.intercept_.astype('float32')

        return clf, CLF(clf.coef_, clf.intercept_)
Example #3
0
    def test_binary_predicting(self):
        """Test binary softmax classifier."""
        target_num = 2
        (W, b), (X, y) = self.make_lr_data(target_num=target_num, dtype=glue.config.floatX)

        # When target_num == 2, LogisticRegression from scikit-learn uses sigmoid,
        # so does our LogisticRegression implementation.
        lr = LogisticRegression().fit(X, y)
        lr.coef_ = W.T
        lr.intercept_ = b
        self.assertTrue(np.alltrue(lr.predict(X) == y))

        graph = G.Graph()
        with graph.as_default():
            input_var = G.make_placeholder('inputs', shape=(None, W.shape[0]), dtype=glue.config.floatX)
            input_layer = G.layers.InputLayer(input_var, shape=(None, W.shape[0]))
            lr2 = models.LogisticRegression('logistic', input_layer, target_num=target_num, W=W, b=b)
            predict_prob = G.layers.get_output(lr2)
            predict_label = G.op.argmax(predict_prob, axis=1)
            predict_fn = G.make_function(inputs=[input_var], outputs=[predict_prob, predict_label])

        with G.Session(graph):
            prob, predict = predict_fn(X)
            self.assertTrue(np.alltrue(predict == y))
            err = np.max(abs(lr.predict_proba(X) - prob))
            self.assertLess(err, 1e-5)
Example #4
0
def search_specificity(s, y, z, return_score=False, verbose=None):
    """Find specificity search ranking."""

    logit = LogisticRegression()

    rank_s = np.zeros(len(s), dtype=np.int)
    r_s = np.zeros((len(s), len(s)))
    if verbose is not None:
        pbar = ProgressBar(widgets=['Specificity search: ', SimpleProgress()],
                           maxval=len(s)).start()
    for query_idx in range(len(s)):
        if verbose is not None:
            pbar.update(query_idx + 1)

        for ref_idx, (this_y, this_z) in enumerate(zip(y, z)):
            logit.intercept_ = np.array([this_y])
            logit.coef_ = np.array([[this_z]])
            r_s[query_idx, ref_idx] = logit.predict_proba(s[query_idx,
                                                            ref_idx])[0][1]

        r_s[np.isnan(r_s)] = -np.inf
        idx_s = _matlab_sort(r_s[query_idx, :])

        # make matlab equiv. by adding 1
        rank_s[query_idx] = np.where(idx_s == query_idx)[0][0] + 1

    if verbose is not None:
        pbar.finish()

    if return_score:
        return rank_s, r_s
    else:
        return rank_s
Example #5
0
    def test_add_loss_output_cls(self):
        from onnxcustom.utils.orttraining_helper import add_loss_output
        X, y = make_classification(  # pylint: disable=W0632
            100, n_features=10)
        X = X.astype(numpy.float32)
        y = y.astype(numpy.int64)
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        reg = LogisticRegression()
        reg.fit(X_train, y_train)
        reg.coef_ = reg.coef_.reshape((1, -1))
        onx = to_onnx(reg,
                      X_train,
                      target_opset=opset,
                      black_op={'LinearClassifier'},
                      options={'zipmap': False})
        onx_loss = add_loss_output(onx,
                                   'log',
                                   output_index='probabilities',
                                   eps=1 - 6)
        try:
            text = onnx_simple_text_plot(onx_loss)
        except RuntimeError:
            text = ""
        if text:
            self.assertIn("Clip(probabilities", text)

        oinf = OnnxInference(onx_loss)
        output = oinf.run({'X': X_test, 'label': y_test.reshape((-1, 1))})
        loss = output['loss']
        skl_loss = log_loss(y_test, reg.predict_proba(X_test), eps=1 - 6)
        self.assertLess(numpy.abs(skl_loss - loss[0, 0]), 1e-5)
Example #6
0
def copy_logistic_model(model: LogisticRegression,
                        max_iter=10e5,
                        penalty='none') -> LogisticRegression:
    copied_model = LogisticRegression(max_iter=max_iter, penalty=penalty)
    copied_model.coef_ = model.coef_.copy()
    copied_model.classes_ = model.classes_.copy()
    copied_model.intercept_ = model.intercept_
    return copied_model
Example #7
0
def set_model_params(
    model: LogisticRegression, params: LogRegParams
) -> LogisticRegression:
    """Sets the parameters of a sklean LogisticRegression model."""
    model.coef_ = params[0]
    if model.fit_intercept:
        model.intercept_ = params[1]
    return model
Example #8
0
File: q53.py Project: simaki/nlp100
def get_fitted_lr():
    coef_ = np.loadtxt(project_path / "output/news/lr_coef.txt")
    intercept_ = np.loadtxt(project_path / "output/news/lr_intercept.txt")
    classes_ = np.loadtxt(project_path / "output/news/lr_classes.txt")

    lr = LogisticRegression()
    lr.coef_, lr.intercept_, lr.classes_ = coef_, intercept_, classes_

    return lr
Example #9
0
def deserialize_logistic_regression(model_dict):
    model = LogisticRegression(model_dict["params"])

    model.classes_ = np.array(model_dict["classes_"])
    model.coef_ = np.array(model_dict["coef_"])
    model.intercept_ = np.array(model_dict["intercept_"])
    model.n_iter_ = np.array(model_dict["intercept_"])

    return model
def deserialize_logistic_regression(model_dict):
    model = LogisticRegression(model_dict['params'])

    model.classes_ = np.array(model_dict['classes_'])
    model.coef_ = np.array(model_dict['coef_'])
    model.intercept_ = np.array(model_dict['intercept_'])
    model.n_iter_ = np.array(model_dict['intercept_'])

    return model
    def choose(self, pvalues, method, outcome):
        '''
        Randomly choose state of node from probability distribution conditioned on *pvalues*.
        This method has two parts: (1) determining the proper probability
        distribution, and (2) using that probability distribution to determine
        an outcome.
        Arguments:
            1. *pvalues* -- An array containing the assigned states of the node's parents. This must be in the same order as the parents appear in ``self.Vdataentry['parents']``.
        The function creates a Gaussian distribution in the manner described in :doc:`lgbayesiannetwork`, and samples from that distribution, returning its outcome.
        
        '''
        warnings.filterwarnings("ignore", category=FutureWarning)
        rand = random.random()

        dispvals = []
        lgpvals = []
        for pval in pvalues:
            if (isinstance(pval, str)):
                dispvals.append(pval)
            else:
                lgpvals.append(pval)
        # find correct Gaussian
        lgdistribution = self.Vdataentry["hybcprob"][str(dispvals)]

        for pvalue in lgpvals:
            assert pvalue != 'default', "Graph skeleton was not topologically ordered."

        model = LogisticRegression(multi_class='multinomial',
                                   solver='newton-cg',
                                   max_iter=100)

        model.classes_ = np.array(lgdistribution["classes"], dtype=object)

        if len(lgdistribution["classes"]) > 1:
            model.coef_ = np.array(lgdistribution["mean_scal"],
                                   dtype=float).reshape(-1, len(lgpvals))
            model.intercept_ = np.array(lgdistribution["mean_base"],
                                        dtype=float)
            distribution = model.predict_proba(
                np.array(lgpvals).reshape(1, -1))[0]

            # choose
            rand = random.random()
            lbound = 0
            ubound = 0
            for interval in range(len(lgdistribution["classes"])):
                ubound += distribution[interval]
                if (lbound <= rand and rand < ubound):
                    rindex = interval
                    break
                else:
                    lbound = ubound

            return str(lgdistribution["classes"][rindex])

        else:
            return str(lgdistribution["classes"][0])
def changing_the_model_coefficients(X, y):
    model = LogisticRegression()
    model.fit(X, y)
    model.coef_ = np.array([[-1, 1]])
    model.intercept_ = np.array([-3])

    util.plot_classifier(X, y, model)
    num_err = np.sum(y != model.predict(X))
    print("Number of errors:", num_err)
Example #13
0
    def test(self, weights, intercepts):
        X_test = pd.DataFrame(self.df, columns=self.features)
        y_test = pd.DataFrame(self.df, columns=self.label)
        y_test = y_test.values.ravel()

        lr = LogisticRegression()
        lr.fit(X_test, y_test)
        lr.coef_ = weights  # override weights and coefficients
        lr.intercept_ = intercepts
        return lr.score(X_test, y_test)
Example #14
0
def LogR_predict():
    X = json.loads(request.form['X'])
    params = json.loads(request.form['params'])
    reg = LogisticRegression()
    reg.coef_ = np.array(params['coef'])
    reg.intercept_ = np.array(params['inter'])
    reg.n_iter_ = np.array(params['niter'])
    reg.classes_ = np.array(params['classes'])
    y = reg.predict(X)
    return jsonify(pred=y.tolist())
Example #15
0
def examineModel(X_train, y_train):
    model = util.openPkl("../models/avg_logistic_model")
    avg_coefficients = model['coeff_']
    avg_intercepts = model['intercept_']

    clf = LogisticRegression()
    clf.coef_ = avg_coefficients
    clf.intercept_ = avg_intercepts
    clf.classes_ = util.classes
    print("Averaged model train accuracy:", clf.score(X_train, y_train))
    avg_preds = clf.predict(X_train)
Example #16
0
    def resample_voting_weights(self, loc, scale):
        """
        Samples voting weights for the whole ensemble from a normal distribution.

        :type loc: numpy.ndarray
        :param loc: the mean of the normal distribution.
        :type scale: float
        :param scale: the std deviation of the normal distribution.
        :rtype: Ensemble
        :return: returns self.
        """

        self.logistic_model = []
        all_preds = self.get_predictions(self.X_train)

        for j in range(self.n_classifiers):
            for c in range(self.n_classes):
                self.voting_weights[j][c] = np.clip(np.random.normal(
                    loc=loc[j][c], scale=scale),
                                                    a_min=0.,
                                                    a_max=1.)

        for i, that_class in enumerate(self.classes):
            if self.n_classes == 2:
                logistic_regression = LogisticRegression()
                logistic_regression.fit(all_preds.T, self.y_train)
                logistic_regression.coef_ = self.voting_weights[:, i].reshape(
                    1, self.n_classifiers)
                self.logistic_model += [logistic_regression]
                break

            else:
                binary_preds = (all_preds == that_class).astype(np.int32)
                logistic_regression = LogisticRegression()
                logistic_regression.fit(binary_preds.T,
                                        self.y_train == that_class)
                logistic_regression.coef_ = self.voting_weights[:, i].reshape(
                    1, self.n_classifiers)
                self.logistic_model += [logistic_regression]

        return self
Example #17
0
def test(user, file):
    import soundfile as sf
    outAddr = os.path.join(WORKING_DIRECTORY + '/test', file + '.npz')
    inAddr = os.path.join(WORKING_DIRECTORY + '/files/' + user, file + '.flac')
    data, sampleRate = sf.read(inAddr)
    extract_features(sampleRate, data, outAddr, True)
    coFile = os.path.join(WORKING_DIRECTORY + '/coefficients', user + '.npy')
    coefficients = np.load(coFile)
    testSet = np.load(outAddr)
    lr = LogisticRegression()
    lr.coef_ = coefficients
    print(lr.predict(testSet))
Example #18
0
    def test_sklearn_train_lr_into_c(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.datasets import load_iris
        iris = load_iris()
        X = iris.data[:, :2]
        y = iris.target
        y[y == 2] = 1
        lr = LogisticRegression()
        lr.fit(X, y)

        # We replace by double too big for floats.
        lr.coef_ = numpy.array([[2.45, -3e250]])
        self.assertRaise(lambda: sklearn2graph(
            lr, output_names=['Prediction', 'Score']), Float32InfError)
Example #19
0
def pbubble(mlscore, hr):
    #Parameters for Logistic fit determined in Expert_votes.ipynb
    clf = LogisticRegression()
    clf.intercept_ = np.array([ 1.868260])
    clf.coef_ = np.array([[1.539796]])

    # zsacling and combining. see Expert_votes.ipynb
    s = (hr - .2143) / .11379 + (mlscore - .10044) / .5893

    good = np.isfinite(s)

    result = np.zeros_like(mlscore) * np.nan
    result[good] = clf.predict_proba(s[good].reshape(-1, 1))[:, 1]
    return result
Example #20
0
def predict_x_event_new(dname, ts, filters, B_coef, B_bias):
    thres = 0.99  # might change
    lvls = [0.2, 0.8, 0.95]

    res_y_hat = []
    res_y_bin = []
    res_y_cat = []
    all_y = []
    for i in range(0, len(ts)):
        t, X, y = make_dataset_fast(dname, [ts[i]], filters)

        dum = np.array(y)
        if len(np.unique(dum)) == 1:
            print("Time series ", ts[i], " has no events")
            continue

        # load model (new)
        model = LogisticRegression(max_iter=1).fit(X, y)
        model.coef_ = B_coef
        model.intercept_ = B_bias

        y_hat = model.predict_proba(X)
        y_hat = y_hat[:, 1]

        y_bin = y_hat < thres
        y_cat = np.zeros(len(y_hat))
        y_cat[(y_hat > lvls[0]) & (y_hat < lvls[1])] = 1
        y_cat[(y_hat > lvls[1]) & (y_hat < lvls[2])] = 2
        y_cat[y_hat > lvls[2]] = 3

        res_y_hat.extend(y_hat)
        res_y_bin.extend(y_bin)
        res_y_cat.extend(y_cat)
        all_y.extend(y)

        fid = open(dname + ts[i] + 'minotaur_prediction.dat', 'w')
        for ii in range(len(y)):
            fid.write('%.4f, %.4f, %.4f, %.4f, %.4f\n' %
                      (t[ii], y[ii], y_hat[ii], y_bin[ii], y_cat[ii]))
        fid.close()

    z1, z2, _ = roc_curve(all_y, res_y_hat, pos_label=1)
    auc = roc_auc_score(all_y, res_y_hat)

    res_auc = auc
    res_fpr = z1  # false positive rate (false alarm)
    res_tpr = z2  # true positive rate (hit)

    return res_auc, res_fpr, res_tpr, res_y_hat, res_y_bin, res_y_cat
Example #21
0
def set_initial_params(model: LogisticRegression):
    """Sets initial parameters as zeros Required since model params are
    uninitialized until model.fit is called.

    But server asks for initial parameters from clients at launch. Refer
    to sklearn.linear_model.LogisticRegression documentation for more
    information.
    """
    n_classes = 10  # MNIST has 10 classes
    n_features = 784  # Number of features in dataset
    model.classes_ = np.array([i for i in range(10)])

    model.coef_ = np.zeros((n_classes, n_features))
    if model.fit_intercept:
        model.intercept_ = np.zeros((n_classes,))
Example #22
0
def load_model_info(model_info):
    """Return a longform model from a model info JSON object.

    Parameters
    ----------
    model_info : dict
        The JSON object containing the attributes of a model.

    Returns
    -------
    longform_model : py:class:`adeft.classify.AdeftClassifier`
        The classifier that was loaded from the given JSON object.
    """
    shortforms = model_info['shortforms']
    pos_labels = model_info['pos_labels']
    longform_model = AdeftClassifier(shortforms=shortforms,
                                     pos_labels=pos_labels)
    ngram_range = model_info['tfidf']['ngram_range']
    tfidf = TfidfVectorizer(ngram_range=ngram_range, stop_words='english')
    logit = LogisticRegression(multi_class='auto')

    tfidf.vocabulary_ = model_info['tfidf']['vocabulary_']
    tfidf.idf_ = model_info['tfidf']['idf_']
    logit.classes_ = np.array(model_info['logit']['classes_'], dtype='<U64')
    logit.intercept_ = np.array(model_info['logit']['intercept_'])
    logit.coef_ = np.array(model_info['logit']['coef_'])

    estimator = Pipeline([('tfidf', tfidf), ('logit', logit)])
    longform_model.estimator = estimator
    # These attributes do not exist in older adeft models.
    # For backwards compatibility we check if they are present
    if 'stats' in model_info:
        longform_model.stats = model_info['stats']
    if 'std' in model_info:
        longform_model._std = np.array(model_info['std'])
    if 'timestamp' in model_info:
        longform_model.timestamp = model_info['timestamp']
    if 'training_set_digest' in model_info:
        longform_model.training_set_digest = model_info['training_set_digest']
    if 'params' in model_info:
        longform_model.params = model_info['params']
    if 'version' in model_info:
        longform_model.version == model_info['version']
    if 'confusion_info' in model_info:
        longform_model.confusion_info = model_info['confusion_info']
    if 'other_metadata' in model_info:
        longform_model.other_metadata = model_info['other_metadata']
    return longform_model
Example #23
0
def build_model_from_factors(factors, intercept, y, X):
    # "factors" has all the values such as is_enabled, is_binary, is_balanced
    # for example:
    # print(factors[0]["alias"])
    # print(factors[0]["is_enabled"])
    # print(factors[0]["weight"])
    ##
    assert intercept is not None, "intercept is None"
    coefficient_list = preparelist(factors, X.columns, intercept)
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=.25, random_state=42)
    model = LogisticRegression()
    model.fit(X_train,
              y_train)  # It would be nice if we didn't have to do this
    model.coef_ = np.array([coefficient_list])
    return model
Example #24
0
def predict_x_event(dataset_t, dataset_X, dataset_y, B_coef, B_bias,
                    fname_out):
    '''
    Performs prediction for the data inside the dataset structure and
    computes the AUC performance of the predictive model.
    '''

    X = dataset_X
    y = dataset_y
    t = dataset_t

    thres = 0.99  # might change
    lvls = [0.2, 0.8, 0.95]

    # load model (new)
    model = LogisticRegression(max_iter=1).fit(X, y)
    #model.classes_ = [1, 2]
    model.coef_ = B_coef
    model.intercept_ = B_bias

    y_hat = model.predict_proba(X)
    y_hat = y_hat[:, 1]
    res_y_hat = y_hat

    z1, z2, _ = roc_curve(y, y_hat, pos_label=1)
    auc = roc_auc_score(y, y_hat)

    res_auc = auc
    res_fpr = z1  # false positive rate (false alarm)
    res_tpr = z2  # true positive rate (hit)

    res_y_bin = y_hat < thres

    y_cat = np.zeros(len(y_hat))
    y_cat[(y_hat > lvls[0]) & (y_hat < lvls[1])] = 1
    y_cat[(y_hat > lvls[1]) & (y_hat < lvls[2])] = 2
    y_cat[y_hat > lvls[2]] = 3
    res_y_cat = y_cat

    # save
    fid = open(fname_out, 'w')
    for ii in range(len(y)):
        fid.write('%.4f, %.4f, %.4f, %.4f, %.4f\n' %
                  (t[ii], y[ii], y_hat[ii], res_y_bin[ii], y_cat[ii]))
    fid.close()

    return res_auc, res_fpr, res_tpr, res_y_hat, res_y_bin, res_y_cat
Example #25
0
def fed_integrate_model_lr(model1, model2):

    coef_1 = model1.coef_
    coef_2 = model2.coef_

    intercept_1 = model1.intercept_
    intercept_2 = model2.intercept_

    classes = model1.classes_

    model = LogisticRegression(solver='sag')
    model.coef_ = mulkeys_add_2Darray(model1.coef_, model2.coef_)/2
    model.intercept_ = mulkeys_add_array(
        model1.intercept_, model1.intercept_)/2
    model.classes_ = classes

    return model
Example #26
0
def binary_logi(base_data, oneshot_data, param):
    tr = []
    tr_label = []
    for data in base_data:
        tr.append(data)
        tr_label.append(0.0)
    for data in oneshot_data:
        tr.append(data)
        tr_label.append(1.0)
    tr, tr_label = shuffle(tr, tr_label)
    clf = LogisticRegression(warm_start=True)
    print param
    if param is not None:
        clf.coef_ = param[:-1]
        clf.intercept_ = param[-1]
    clf.fit(tr, tr_label)
    return np.c_[clf.coef_, clf.intercept_]
def evaluate_sample(sample, central_text):
    train_columns = [
        'CANDIDATE_CONTAINS_DIGITS', 'CANDIDATE_CONTAINS_PUNCTUATION',
        'CANDIDATE_IS_STOPWORD', 'CHAR_DISTANCE',
        'CHAR_DISTANCE_/_LEN_CAND_TEXT', 'CHAR_DISTANCE_/_LEN_ORIG_TEXT',
        'LEN_CAND_PRONOUNCE', 'LEN_ORIG_PRONOUNCE',
        'MEAN_CAND_PHRASE_FREQ_IN_CLIENT_VOCAB',
        'MEAN_CAND_PHRASE_TFIDF_IN_CLIENT_VOCAB',
        'MEAN_FREQUENCY_OF_CAND_WORD_BY_WORDFREQ',
        'MEAN_FREQUENCY_OF_ORIG_WORD_BY_WORDFREQ',
        'MEAN_ORIG_PHRASE_FREQ_IN_CLIENT_VOCAB',
        'MEAN_ORIG_PHRASE_TFIDF_IN_CLIENT_VOCAB', 'ORIG_PHRASE_IS_STOPWORD',
        'PHONEME_DISTANCE', 'PHONEME_DISTANCE_/_LEN_CAND_PRONOUNCE',
        'PHONEME_DISTANCE_/_LEN_ORIG_PRONOUNCE'
    ]
    non_feature_columns = [
        'target',
        'badly_recognized_text',
        'candidate_text',
        'context',
    ]
    WEIGHTS = np.array([[
        0.00000000e+00, 1.19336941e+00, -9.37956263e-01, -9.10860368e-01,
        -4.40045761e-01, -4.56662605e-02, 2.95188685e-01, 1.39309686e-01,
        -2.00982855e-02, 8.42150367e-01, -1.83510718e-02, 1.41584803e-02,
        -2.58391711e-02, -1.73674143e-01, 3.30475285e-01, 1.14180462e-01,
        4.28573585e-04, 3.97560332e-04
    ]])
    BIAS = np.array([-1.24675658])
    X_test = sample[train_columns].to_numpy()
    clf = LogisticRegression()
    clf.coef_ = WEIGHTS
    clf.intercept_ = BIAS
    ts_preds = clf.predict_proba(X_test)[:, 1]
    df_to_display = sample[non_feature_columns].copy()
    df_to_display['score'] = ts_preds
    df_to_display = df_to_display.head(50).sort_values(by='score',
                                                       ascending=False)
    items = []
    for i in range(df_to_display.shape[0]):
        items.append((df_to_display.candidate_text.iloc[i],
                      df_to_display.score.iloc[i]))
    items = list(set(items))
    items.sort(key=lambda x: x[1], reverse=True)
    return items
Example #28
0
    def convert(self, model_dict):
        param_obj = model_dict["HomoLogisticRegressionParam"]
        meta_obj = model_dict["HomoLogisticRegressionMeta"]

        sk_lr_model = LogisticRegression(penalty=meta_obj.penalty.lower(),
                                         tol=meta_obj.tol,
                                         fit_intercept=meta_obj.fit_intercept,
                                         max_iter=meta_obj.max_iter)

        coefficient = np.empty((1, len(param_obj.header)))
        for index in range(len(param_obj.header)):
            coefficient[0][index] = param_obj.weight[param_obj.header[index]]
        sk_lr_model.coef_ = coefficient
        sk_lr_model.intercept_ = np.array([param_obj.intercept])
        # hard-coded 0-1 classification as HomoLR only supports this for now
        sk_lr_model.classes_ = np.array([0., 1.])
        sk_lr_model.n_iter_ = [param_obj.iters]
        return sk_lr_model
Example #29
0
    def runClassification(spend, marital, hh_size, income):
        # define the model
        model = LogisticRegression(max_iter=1000)
        # dummy training to initialize weights and biases of the model
        model.fit(np.array([[0, 0, 0, 0], [1, 1, 1, 1]]), [0, 1])

        # Assigning trained weights and biases to the model
        model.coef_ = np.array(
            [[0.01418876, -0.34609983, 0.14408751, 0.02292672]])  # weights
        model.bias_ = np.array([-0.3975337])  # bias

        example_instance = np.array(
            [[spend, marital, hh_size,
              income]])  # [[TOTAL_SPEND, MARITAL, HH_SIZE, INCOME_RANGE]]

        # Test the model
        prediction = model.predict(example_instance)
        return prediction[0].item()
Example #30
0
    def get_density_score(self, file):
        preprocessor = Preprocessor(width=256, height=224, interpolation=3)
        dicom = preprocessor.load_dicom(file)
        manufacturer = DicomManager.get_manufacturer(dicom)
        normalization_config = self.GE_MD_NORMALIZER_CONFIG if manufacturer == 'GE' else self.HO_MD_NORMALIZER_CONFIG
        normalizer = CLAHENormalizer(**normalization_config)

        lr = LogisticRegression()
        coef = self.GE_DR_COEF if manufacturer == 'GE' else self.HO_DR_COEF
        intercept = self.GE_DR_INT if manufacturer == 'GE' else self.HO_DR_INT
        lr.coef_ = np.load(coef)
        lr.intercept_ = np.load(intercept)
        model = DenseRisk(preprocessor=preprocessor, normalizer=normalizer)
        model.risk_model = lr
        model.num_feature = self.DR_NUM_FEATURES
        model.scaler = self.GE_DR_SCALER if manufacturer == 'GE' else self.HO_DR_SCALER

        return model.get_density_score(file)
Example #31
0
def load_model(serialization_dir):
    with open(os.path.join(args.model, "best_hyperparameters.json"), 'r') as f:
        hyperparameters = json.load(f)
    if hyperparameters.pop('stopwords') == 1:
        stop_words = 'english'
    else:
        stop_words = None
    weight = hyperparameters.pop('weight')
    if weight == 'binary':
        binary = True
    else:
        binary = False
    ngram_range = hyperparameters.pop('ngram_range')
    ngram_range = sorted([int(x) for x in ngram_range.split()])
    if weight == 'tf-idf':
        vect = TfidfVectorizer(stop_words=stop_words,
                               lowercase=True,
                               ngram_range=ngram_range)
    else:
        vect = CountVectorizer(binary=binary,
                               stop_words=stop_words,
                               lowercase=True,
                               ngram_range=ngram_range)
    with open(os.path.join(args.model, "vocab.json"), 'r') as f:
        vocab = json.load(f)
    vect.vocabulary_ = vocab
    hyperparameters['C'] = float(hyperparameters['C'])
    hyperparameters['tol'] = float(hyperparameters['tol'])
    classifier = LogisticRegression(**hyperparameters)
    if os.path.exists(os.path.join(serialization_dir, "archive", "idf.npy")):
        vect.idf_ = np.load(
            os.path.join(serialization_dir, "archive", "idf.npy"))
    classifier.coef_ = np.load(
        os.path.join(serialization_dir, "archive", "coef.npy"))
    classifier.intercept_ = np.load(
        os.path.join(serialization_dir, "archive", "intercept.npy"))
    classifier.classes_ = np.load(
        os.path.join(serialization_dir, "archive", "classes.npy"))
    return classifier, vect
Example #32
0
def load_model_info(model_info):
    """Return a longform model from a model info JSON object.

    Parameters
    ----------
    model_info : dict
        The JSON object containing the attributes of a model.

    Returns
    -------
    longform_model : py:class:`adeft.classify.AdeftClassifier`
        The classifier that was loaded from the given JSON object.
    """
    shortforms = model_info['shortforms']
    pos_labels = model_info['pos_labels']
    longform_model = AdeftClassifier(shortforms=shortforms,
                                     pos_labels=pos_labels)
    ngram_range = model_info['tfidf']['ngram_range']
    tfidf = TfidfVectorizer(ngram_range=ngram_range, stop_words='english')
    logit = LogisticRegression(multi_class='auto')

    tfidf.vocabulary_ = model_info['tfidf']['vocabulary_']
    tfidf.idf_ = model_info['tfidf']['idf_']
    logit.classes_ = np.array(model_info['logit']['classes_'], dtype='<U64')
    logit.intercept_ = np.array(model_info['logit']['intercept_'])
    logit.coef_ = np.array(model_info['logit']['coef_'])

    estimator = Pipeline([('tfidf', tfidf), ('logit', logit)])
    longform_model.estimator = estimator
    # Load model statistics if they are available
    if 'stats' in model_info:
        longform_model.stats = model_info['stats']
    # Load standard deviations for calculating feature importances
    # if they are available
    if 'std' in model_info:
        longform_model._std = np.array(model_info['std'])
    return longform_model
Example #33
0
    def test_categorical_predicting(self):
        """Test categorical softmax classifier."""
        target_num = 5
        (W, b), (X, y) = self.make_lr_data(target_num=target_num, dtype=glue.config.floatX)

        lr = LogisticRegression(multi_class='multinomial', solver='lbfgs').fit(X, y)
        lr.coef_ = W.T
        lr.intercept_ = b
        self.assertTrue(np.alltrue(lr.predict(X) == y))

        graph = G.Graph()
        with graph.as_default():
            input_var = G.make_placeholder('inputs', shape=(None, W.shape[0]), dtype=glue.config.floatX)
            input_layer = G.layers.InputLayer(input_var, shape=(None, W.shape[0]))
            lr2 = models.LogisticRegression('logistic', input_layer, target_num=target_num, W=W, b=b)
            predict_prob = G.layers.get_output(lr2)
            predict_label = G.op.argmax(predict_prob, axis=1)
            predict_fn = G.make_function(inputs=[input_var], outputs=[predict_prob, predict_label])

        with G.Session(graph):
            prob, predict = predict_fn(X)
            self.assertTrue(np.alltrue(predict == y))
            err = np.max(abs(lr.predict_proba(X) - prob))
            self.assertLess(err, 1e-5)
data_cls = np.asarray(cls_all)
data_pln = np.asarray(pln_all)

# Load GAT model
gat = joblib.load(data_path + "decode_time_gen/gat_cp.jl")

# Setup data for epochs and cross validation
X = np.vstack([data_cls, data_pln])
y = np.concatenate([np.zeros(len(data_cls)), np.ones(len(data_pln))])
cv = StratifiedKFold(n_splits=7, shuffle=True)

perm_score_results = []
for j, est in enumerate(gat.estimators_):
    for tmp in est:
        lr_mean = LogisticRegression(C=0.0001)
        lr_mean.coef_ = np.asarray([lr.coef_ for lr in est]).mean(
            axis=0).squeeze()
        lr_mean.intercept_ = np.asarray([lr.intercept_ for lr in est]).mean()

    score, perm_score, pval = permutation_test_score(
        lr_mean, X[:, :, j], y, cv=cv, scoring="roc_auc", n_permutations=2000)
    perm_score_results.append({
        "score": score,
        "perm_score": perm_score,
        "pval": pval
    })

joblib.dump(perm_score_results,
            data_path + "decode_time_gen/perm_score_results_cp.npy")
Example #35
0
def logistic_from_weights(weights, intercept):
    # Rebuild the trained model given the parameters
    logreg = LogisticRegression()
    logreg.coef_ = weights
    logreg.intercept_ = intercept
    return logreg