예제 #1
0
def main():
    neg_revs = read_reviews_in_file("./rt-polaritydata/rt-polarity.neg")
    pos_revs = read_reviews_in_file("./rt-polaritydata/rt-polarity.pos")

    nb = NaiveBayes(neg_revs, pos_revs, val_split=0.2)
    nb.evaluate_naive_bayes()

    lr = LogisticRegression(neg_revs,
                            pos_revs,
                            val_split=0.2,
                            lr=0.85,
                            num_inter=1000)
    lr.evaluate_logistic_regression()

    lr = LogisticRegression(neg_revs,
                            pos_revs,
                            val_split=0.2,
                            lr=0.85,
                            num_inter=3000)
    lr.evaluate_logistic_regression()

    # Just for fun – tensorflow
    LogisticRegression_tf(neg_revs,
                          pos_revs,
                          val_split=0.2,
                          lr=0.01,
                          num_inter=200)
예제 #2
0
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    clf1 = linear_model.LogisticRegression()
    clf1 = LogisticRegression()
    clf1.fit(X_train, y_train)
    y_pred = clf1.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    accuracy = accuracy_score(y_test, y_pred)
    print("sklearn lr Accuracy:", accuracy)

    clf2 = LogisticRegression()
    clf2.fit(X_train, y_train)
    y_pred = clf2.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    accuracy = accuracy_score(y_test, y_pred)
    print("Our lr Accuracy:", accuracy)
예제 #3
0
파일: p02.py 프로젝트: mukhikaran/cs229
def p02cde(train_path, valid_path, test_path, pred_path):
    """Logistic regression with Newton's Method

    Args:
        train_path: Path to CSV file containing dataset for training.
        validation_path: Path to CSV file containing dataset for evaluation.
        test_path: Path to CSV file containing dataset for testing.
        pred_path: Path to save predictions.
    """
    pred_path_c = pred_path.replace(WILDCARD, "c")
    pred_path_d = pred_path.replace(WILDCARD, "d")
    pred_path_e = pred_path.replace(WILDCARD, "e")

    # Part (c)
    # Train classifier
    x_train, y_train = utils.load_dataset(train_path,
                                          label_col="t",
                                          add_intercept=True)
    model = LogisticRegression()
    model.fit(x_train, y_train)
    # Validate classifier
    x_test, y_test = utils.load_dataset(valid_path,
                                        label_col="t",
                                        add_intercept=True)
    t_pred = model.predict(x_test)
    utils.plot(x_test, y_test, model.theta, "{}.png".format(pred_path_c))
    np.savetxt(pred_path_c, t_pred)

    # Part (d)
    x_train, y_train = utils.load_dataset(test_path,
                                          label_col="y",
                                          add_intercept=True)
    model = LogisticRegression()
    model.fit(x_train, y_train)
    # Validate classifier
    x_test, y_test = utils.load_dataset(test_path,
                                        label_col="t",
                                        add_intercept=True)
    y_pred = model.predict(x_test)
    utils.plot(x_test, y_test, model.theta, "{}.png".format(pred_path_d))
    np.savetxt(pred_path_d, y_pred)

    # Part (e) find corrections
    x_val, y_val = utils.load_dataset(valid_path,
                                      label_col="y",
                                      add_intercept=True)
    x_in_V = [x_train[i] for i in len(x_train) if y_train == 1]
    h = model.predict(x_in_V)
    alpha = np.mean(h)
예제 #4
0
파일: multi_lr.py 프로젝트: kisabe/ml2019
def one_vs_all(X, y, lam):
    """
    多値分類の判別器

    Parameters
    --------------------
    X: np.array(n,d)
        データ
    y: np.array(n)
        ラベル(k種類)
    lam: int
        正則化項の係数

    Returns:
    W: np.array(d,k)
        W = (w1, w2, ..., wk)
        各w_iはi番目の要素とその他の要素を分類する判別直線の係数
    """
    labels = np.unique(y)
    X = np.insert(X, 3, 1, axis=1)
    n, d = X.shape
    w = np.empty((d,len(labels)))
    for i, main_label in enumerate(labels):
        label = np.array([1 if y_i == main_label else -1 for y_i in y])
        lr = LogisticRegression(X, label, lam)
        eta_t = lambda t: 1/(t+1)
        w[:,i], _ = lr.steepest_gradient_descent(learning_rate=eta_t, max_itr=1000)
        # bc_plot(X[:,1:3], label, w[1:4,i])
    return w
예제 #5
0
 def test_loss(self):
     model = LogisticRegression(2)
     model.w = np.random.random(2) * 2 - 1
     random_data = np.random.random((3, 2)) * 2 - 1
     random_labels = np.random.randint(0, 2, 3)
     self.assertTrue(model.loss(random_data, random_labels) >= -1e-8)
     self.assertTrue(model.loss(random_data, 1 - random_labels) >= -1e-8)
def test1():
    from sklearn.datasets import load_iris
    X, y = load_iris(return_X_y=True)
    y = np.vectorize(lambda x: 1 if x!=0 else x)(y)
    clf = LogisticRegression().fit(X, y)
    prediction = clf.predict([[5.3, 3.9, 1.2, 0.1], [1.3, 1.9, 0.2, 0.1], [11.3, 1.9, 0.2, 0.1]])
    print(prediction)
예제 #7
0
def train(train_X,
          train_y,
          val_X=None,
          val_y=None,
          factor=1,
          bias=0,
          num_epochs=1000,
          step_size=1e-3,
          check_grad=False,
          verbose=False):
    """ This function trains a logistic regression model on the given training  data.

    Args:
    - train_X (ndarray (shape: (N, D))): A NxD matrix containing N D-dimensional training inputs.
    - train_y (ndarray (shape: (N, 1))): A N-column vector containing N scalar training outputs (labels).
    - val_X (ndarray (shape: (M, D))): A NxD matrix containing M D-dimensional validation inputs.
    - val_y (ndarray (shape: (M, 1))): A N-column vector containing M scalar validation outputs (labels).

    Initialization Args:
    - factor (float): A constant factor to scale the initial weights.
    - bias (float): The bias value

    Learning Args:
    - num_epochs (int): Number of gradient descent steps
                        NOTE: 1 <= num_epochs
    - step_size (float): Gradient descent step size
    - check_grad (bool): Whether or not to check gradient using finite difference.
    - verbose (bool): Whether or not to print gradient information for every step.
    """
    train_accuracy = 0
    # ====================================================
    # TODO: Implement your solution within the box
    # Step 1: Initialize model and initialize weights

    model = LogisticRegression(np.shape(train_X)[1], len(np.unique(train_y)))
    model.init_weights(factor, bias)

    # Step 2: Train the model
    model.learn(train_X, train_y, num_epochs, step_size, check_grad, verbose)

    # Step 3: Evaluate training performance
    train_probs = model.predict(train_X)

    # ====================================================
    train_preds = np.argmax(train_probs, axis=1)
    train_accuracy = 100 * np.mean(train_preds == train_y.flatten())
    print("Training Accuracy: {}%".format(train_accuracy))

    if val_X is not None and val_y is not None:
        validation_accuracy = 0
        # ====================================================
        # TODO: Implement your solution within the box
        # Evaluate validation performance

        val_probs = model.predict(val_X)

        # ====================================================
        val_preds = np.argmax(val_probs, axis=1)
        validation_accuracy = 100 * np.mean(val_preds == val_y.flatten())
        print("Validation Accuracy: {}%".format(validation_accuracy))
def main():
    # Init Crypten and disable OpenMP threads (needed by @mpc.run_multiprocess
    crypten.init()
    torch.set_num_threads(1)

    lr = LogisticRegression()
    lr.train(init_w, training_samples, alpha)
예제 #9
0
def test_fit_functional():
    import sklearn.model_selection
    import numpy as np

    from logistic_regression import LogisticRegression, accuracy
    X = np.zeros((900, 3), dtype=np.float32)
    num_samples = 30

    xx = np.linspace(-5, 5, num_samples)
    XX, YY = np.meshgrid(xx, xx)
    X[:, 0] = XX.flatten()
    X[:, 1] = YY.flatten()
    X[:, -1] = 1  # a column of 1's for the bias trick
    Z = 0.1 * XX + 0.2 * YY + 0.4
    y = Z.reshape(-1, 1)
    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
        X, y)
    model = LogisticRegression(input_dimensions=2)
    train_xent, val_xent = model.fit(X_train,
                                     y_train,
                                     X_val,
                                     y_val,
                                     num_epochs=20,
                                     batch_size=4,
                                     alpha=0.1,
                                     _lambda=0.0)
    predictions = model.predict(X_val)
    assert accuracy(predictions, y_val) >= 0.65
    assert accuracy(predictions, y_val) >= 0.90
    assert accuracy(predictions, y_val) >= 0.99
예제 #10
0
def iris_classification():
    print('\nIris classification using Logistic Regression\n')
    print('Initiating Data Load...')

    iris = datasets.load_iris()
    # X, y = iris.data, iris.target
    # y = one_hot_encode(y)

    X, y = iris.data[iris.target != 2], iris.target[iris.target != 2]
    y = y.reshape(y.shape[0], 1)

    size = len(X)
    indices = list(range(size))
    np.random.shuffle(indices)
    X, y = np.array([X[idx] for idx in indices
                     ]), np.array([y[idx] for idx in indices])

    train_size = int(0.8 * len(X))
    X_train, X_test, y_train, y_test = X[:train_size], X[
        train_size:], y[:train_size], y[train_size:]

    print('Data load complete!')

    print('Constructing classifier...')
    size = (X_train.shape[-1], y_train.shape[-1])
    classifier = LogisticRegression(size)
    classifier.fit(X_train, y_train)

    print('Generating test predictions...')
    predictions = classifier.predict(X)

    accuracy = np.sum(
        [all(y_true == y_pred)
         for y_true, y_pred in zip(y, predictions)]) / len(predictions) * 100.
    print("Accuracy = {:.2f}%".format(accuracy))
예제 #11
0
def digit_recognition():
    print('\nDigit recognition using Logistic Regression\n')
    print('Initiating Data Load...')
    digits = datasets.load_digits()
    X, y = digits.data, digits.target

    pca = PCA()
    X = pca.transform(X, num_components=23)
    y = one_hot_encode(y)

    size = len(X)
    indices = list(range(size))
    np.random.shuffle(indices)
    X, y = np.array([X[idx] for idx in indices
                     ]), np.array([y[idx] for idx in indices])

    train_size = int(0.8 * len(X))
    X_train, X_test, y_train, y_test = X[:train_size], X[
        train_size:], y[:train_size], y[train_size:]

    print('Constructing classifier...')
    size = (X_train.shape[-1], y_train.shape[-1])
    classifier = LogisticRegression(size)
    classifier.fit(X_train, y_train)

    print('Generating test predictions...')
    predictions = classifier.predict(X)

    accuracy = np.sum(
        [all(y_true == y_pred)
         for y_true, y_pred in zip(y, predictions)]) / len(predictions) * 100.
    print("Accuracy = {:.2f}%".format(accuracy))
예제 #12
0
def main():

    # Get training matrices for logistic regression model
    x, y = get_train_matrices()

    # Create instance of LogisticRegression with the training matrices
    logistic_regression = LogisticRegression(x, y)

    # Fit with learning rate, no of iterations and regularization(L2) parameter
    logistic_regression.fit(0.01, 100000, 0)

    # Print weights and biases and the plot and also print the performance estimators of the model
    print("So, the weights and biases become:\nWeights:\n {}\nBiases:\n {}"
          .format(logistic_regression.w, logistic_regression.c))

    # Validate the model by printing the performance metrics
    logistic_regression.validate()

    # Graph the curve of cost vs no of epochs
    logistic_regression.graph_cost_vs_epochs()

    # Predict for the input data in test folder and save as output.csv in test folder
    x_test = pd.read_csv('test/input.csv').values[:, 1:]
    y_test = logistic_regression.predict(x_test)
    df_predict = pd.DataFrame({'y': y_test.reshape(-1)})
    df_predict.to_csv('test/output.csv')
예제 #13
0
def test_fit_functional():
    import sklearn.model_selection
    import sklearn.datasets
    import numpy as np

    from logistic_regression import LogisticRegression, accuracy
    X = np.zeros((1000, 3), dtype=np.float32)
    X[:, -1] = 1
    features, targets = sklearn.datasets.make_blobs(1000,
                                                    2,
                                                    2,
                                                    cluster_std=1,
                                                    random_state=1234)
    X[:, [0, 1]] = features
    y = targets[:, np.newaxis]

    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
        X, y)
    model = LogisticRegression(input_dimensions=2)
    train_xent, val_xent = model.fit(X_train,
                                     y_train,
                                     X_val,
                                     y_val,
                                     num_epochs=20,
                                     batch_size=4,
                                     alpha=0.1,
                                     _lambda=0.0)
    predictions = model.predict(X_val)
    assert accuracy(predictions, y_val) >= 0.65
    assert accuracy(predictions, y_val) >= 0.90
    assert accuracy(predictions, y_val) >= 0.99
예제 #14
0
def logistic_regression_convert():

  reset = myhdl.Signal(bool(0))
  clk   = myhdl.Signal(bool(0))
  LEN_THETA=3
  NB_PIPELINE_STAGES = 5
  DATAWIDTH=32
  CHANNEL_WIDTH=1
  INIT_DATA=0 #(0 for myhdl.intbv)

  # --- Pipeline Pars
  pars=LogisticRegressionPars()
  pars.NB_PIPELINE_STAGES=NB_PIPELINE_STAGES
  pars.DATAWIDTH=DATAWIDTH
  pars.CHANNEL_WIDTH=CHANNEL_WIDTH
  pars.INIT_DATA=INIT_DATA
  pars.LEN_THETA=LEN_THETA
  pars.CMD_FILE='tb/tests/mult_pipeline.list'

  lRIO=LogisticRegressionIo()
  lRIO(pars)

  lRModule=LogisticRegression()
  lRInst=lRModule.block_connect(pars, reset, clk, lRIO.pipe_inpA, lRIO.pipe_inpB, lRIO.pipe_out_activ )

  lRInst.convert(hdl='Verilog', path = "converted_hdl", name="logistic_regression")
  lRInst.convert(hdl='VHDL', path = "converted_hdl", name="logistic_regression")
예제 #15
0
def main():
    """
    Main function
    :return: None
    """

    #x_train, y_train, x_test, y_test = gaussians_dataset(2, [40, 25], [[1, 2], [10, 40]], [[10, 11], [14, 20]])
    x_train, y_train, train_names, x_test, y_test, test_names, feature_names = load_got_dataset(
        path='data/got.csv', train_split=0.8)

    logistic_regression = LogisticRegression()

    logistic_regression.fit_gradient_descent(x_train,
                                             y_train,
                                             num_epochs=10000,
                                             learning_rate=0.01,
                                             verbose=True)

    predictions = logistic_regression.predict(x_test)

    accuracy = float(np.sum(predictions == y_test)) / y_test.shape[0]
    print('Test accuracy: {}'.format(accuracy))

    # Test
    plot_boundary(x_test, test_names, logistic_regression)
 def __init__(self,random_stream,input,n_in,n_hidden,n_out = 10):
     
     self.hidden_layer = Hidden(rng = random_stream,
                                     input = input,
                                     n_in = n_in,
                                     n_out = n_hidden,
                                     activation = T.tanh)
     
     self.LogisticRegressionLayer = LogisticRegression(input = self.hidden_layer.output,
                                                       n_in = n_hidden,
                                                       n_out = 10) 
     
     ## compute l1 norm (sum) and squared l2 norm
     self.L1 = (
                 abs(self.hidden_layer.W).sum() + abs(self.LogisticRegressionLayer.W).sum()
              )
     
     self.L2 = (
                  (self.hidden_layer.W **2).sum() + (self.LogisticRegressionLayer.W **2).sum()
                 )
     
     self.neg_loglikelihood = self.LogisticRegressionLayer.negative_log_likelihood
     self.error = self.LogisticRegressionLayer.error
     self.params = self.hidden_layer.params + self.LogisticRegressionLayer.params
     self.input = input
예제 #17
0
def test_logistic_regression():
    X = np.random.normal(size=(100, 2))
    y = np.where(X[:, 0] > 0.5, 1, 0).reshape(-1, 1)
    lr = LogisticRegression()
    lr.fit(X, y)
    pred = 1 if lr.predict(X)[-1] > 0.5 else 0
    assert pytest.approx(pred) == y[-1]
예제 #18
0
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Logistic Regression",
                      accuracy=accuracy,
                      legend_labels=data.target_names)
예제 #19
0
def test_integ_fit():
    test_x = [np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]])]
    test_y = [np.array([1, 0, 1])]
    expected = [np.array([0.01328192, 0.06222676, 0.1111716])]
    lr_model = LogisticRegression()
    for idx in range(len(test_x)):
        lr_model.fit(test_x[idx], test_y[idx])
        assert pytest.approx(expected[idx], 1e-06) == lr_model.parameters
예제 #20
0
def test_l2_regularization_gradient():
    from logistic_regression import LogisticRegression
    model = LogisticRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    gradient = model._l2_regularization_gradient()
    desired = np.float32([[1, 2, 4]]).T

    assert np.allclose(gradient, desired, rtol=1e-3, atol=1e-3) or np.allclose(gradient, 2*desired, rtol=1e-3, atol=1e-3)
예제 #21
0
def test_predict():
    from logistic_regression import LogisticRegression
    model = LogisticRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    X = np.float32([[1, 2, 1], [0, 0, -2]])
    desired = np.float32([[1, 0]]).T
    actual = model.predict(X)
    np.testing.assert_allclose(actual, desired, rtol=1e-3, atol=1e-3)
예제 #22
0
def main():

    dirname = os.path.dirname(__file__)
    output_dirname = os.path.join(dirname, 'results')

    try:
        os.stat(output_dirname)
    except:
        os.mkdir(output_dirname)

    file_name = sys.argv[1]
    dirname = os.path.dirname(__file__)
    file_name = os.path.join(dirname, file_name)

    d = DataSet(file_name)
    d.loadDataSet()

    to_remove = [
        d.data_set[0].index('Index'),
        d.data_set[0].index('First Name'),
        d.data_set[0].index('Last Name'),
        d.data_set[0].index('Birthday'),
        d.data_set[0].index('Best Hand'),
        d.data_set[0].index('Hogwarts House'),

        # Tests 7/10/18
        d.data_set[0].index('Arithmancy'),
        d.data_set[0].index('Defense Against the Dark Arts'),
        d.data_set[0].index('Divination'),
        d.data_set[0].index('Muggle Studies'),
        d.data_set[0].index('History of Magic'),
        d.data_set[0].index('Transfiguration'),
        d.data_set[0].index('Potions'),
        d.data_set[0].index('Care of Magical Creatures'),
        d.data_set[0].index('Charms'),
        d.data_set[0].index('Flying'),
    ]

    X = np.array([[
        d.data_set[i][j] for j in range(len(d.data_set[0]))
        if j not in to_remove
    ] for i in range(len(d.data_set))])
    #features = X[0,:]
    X = convert_to_float(X[1:, ])

    y_col_nb = d.data_set[0].index('Hogwarts House')
    y = np.array(d.extractColumn(y_col_nb)[1:])

    m = MeanImputation(X)
    m.train()
    m.transform()

    sc = Scaling(X)
    sc.train()
    sc.transform()

    l = LogisticRegression(X=X, y=y)
    l.train()
예제 #23
0
def test_loss():
    test_x = [np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]])]
    test_y = [np.array([1, 0, 1])]
    test_beta = [np.array([1.2, 3.4, 2.5])]
    expected = [-22.599999]
    lr_model = LogisticRegression()
    for idx in range(len(test_x)):
        actual = lr_model._loss(test_x[idx], test_y[idx], test_beta[idx])
        assert pytest.approx(expected[idx], 1e-06) == actual
예제 #24
0
    def get(self, algorithm='logistic'):

        if 'logistic' in algorithm.lower():

            return LogisticRegression(self.data, self.labels)

        elif 'hinge' in algorithm.lower():

            return HingeLoss(self.data, self.labels)
예제 #25
0
def get_predictions_logistic_regression(train_data,
                                        train_target,
                                        test_data,
                                        q_tag=None):
    from logistic_regression import LogisticRegression
    lr = LogisticRegression(serial_filename=get_serial_filename_lr(
        q_tag=q_tag))
    lr.train(train_data, train_target)
    return lr.get_predictions(test_data)
예제 #26
0
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        '''
        @rng
        -type : numpy.random.RandomState
        -param : a random number generator used to initialize weights

        @input
        -type : theano.tensor.TensorType
        -param : a symbolic variable that describes the input of the
        architecture

        @n_in
        -type : int
        -param : number of input units, the dimension of the space in which the
        datapoints lie

        @n_hidden
        -type : int
        -param : number of hidden units

        @n_out
        -type : int
        -param : number of output units, the dimension of the space in which
        the labels lie
        '''
        self.hiddenLayer = HiddenLayer(
                rng=rng,
                input=input,
                n_in=n_in,
                n_out=n_hidden,
                activation=T.tanh
        )
        self.logRegressionLayer = LogisticRegression(
                input=self.hiddenLayer.output,
                n_in=n_hidden,
                n_out=n_out
        )

        # Regularization options
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )
        self.L2_sqr = (
                (self.hiddenLayer.W ** 2).sum()
                + (self.logRegressionLayer.W ** 2).sum()
        )

        self.negative_log_likelihood = (
                self.logRegressionLayer.negative_log_likelihood
        )

        self.errors = self.logRegressionLayer.errors

        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.input = input
예제 #27
0
def test_cross_entropy_gradient():
    from logistic_regression import LogisticRegression
    model = LogisticRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    X = np.float32([[1, 2, 1], [0, 0, 1]])
    y = np.float32([[1, 0]]).T
    gradient = model._binary_cross_entropy_gradient(X, y)
    desired = np.float32([[-6e-5, -1e-4, 0.4909]]).T
    np.testing.assert_allclose(gradient, desired, rtol=1e-3, atol=1e-3)
예제 #28
0
def fitting():
    data = pd.read_csv('student_score.txt',
                       names=['Exam1', 'Exam2', 'admission'])
    x = data[['Exam1', 'Exam2']]
    y = data['admission']

    print(x.mean())
    print(x.max() - x.min())

    x = (x - x.mean()) / (x.max() - x.min())

    alpha = 10
    max_iter = 150
    model = LogisticRegression(alpha, max_iter)
    loss, _ = model.fit(x, y)

    p = model.predict(
        np.array([[
            1, (45.0 - 65.644274) / 69.769035, (85.0 - 66.221998) / 68.266173
        ]]), False)
    print('Predict %.3f when Exam1 euqals 45 and Exam2 equals 85' % p)

    plt.subplot(2, 1, 1)
    plt.plot(np.arange(1, max_iter + 1), loss)
    plt.title('Loss Curve')

    plt.subplot(2, 1, 2)
    negative = data[data['admission'] == 0]
    positive = data[data['admission'] == 1]
    plt.plot(negative['Exam1'], negative['Exam2'], 'yo')
    plt.plot(positive['Exam1'], positive['Exam2'], 'k+')

    print(model.w)

    bx = data['Exam1']
    by = (-68.266173 / model.w[2]) * ((
        (bx - 65.644274) / 69.769035) * model.w[1] + model.w[0]) + 66.221998

    x = data[['Exam1', 'Exam2']]
    x = (x - x.mean()) / (x.max() - x.min())

    p = [1 if i >= 0.5 else 0 for i in model.predict(x)]
    tp = sum([1.0 for vp, vy in zip(p, y) if vp == vy and vy == 1])
    tn = sum([1.0 for vp, vy in zip(p, y) if vp == vy and vy == 0])

    fp = sum([1.0 for vp, vy in zip(p, y) if vp == 1 and vy == 0])
    fn = sum([1.0 for vp, vy in zip(p, y) if vp == 0 and vy == 1])

    print(tp, tn, fp, fn)
    print('Accurancy %.2f' % ((tp + tn) / (tp + tn + fp + fn)))
    print('Precision %.2f' % (tp / (tp + fp)))
    print('Recall %.2f' % (tp / (tp + fn)))

    plt.plot(bx, by)

    plt.show()
def train():
    fname = sys.argv[1]
    output_fname = sys.argv[2]

    X, y = get_data(data=read_train_csv(fname))
    model = LogisticRegression(iteration=30000)

    model.fit(X, y)

    model.save(output_fname)
예제 #30
0
def init_model(model_type, delta, area_width):
    if model_type == 'LR':
        return LogisticRegression(delta, area_width)
    elif model_type == 'DT':
        return DecisionTree(delta)
    elif model_type == 'RF':
        return RandomForest(delta)
    else:
        raise SolverException('Invalid model type: ' + Fore.MAGENTA +
                              model_type + Fore.RESET)