def main():
    args = get_args()
    check_args(args)

    if args.mode.lower() == "train":
        # Load the training data.
        X, y = load_data(args.data)
        # Create the model.
        # TODO: Add other algorithms as necessary.
        if args.algorithm.lower() == 'useless':
            model = models.Useless()
        elif args.algorithm.lower() == 'sumoffeatures':
            model = models.SumOfFeatures()
        elif args.algorithm.lower() == 'perceptron':
            if args.online_training_iterations and args.online_learning_rate:
                model = models.Perceptron(args.online_learning_rate,
                                          args.online_training_iterations)
            else:
                model = models.Perceptron()
        else:
            raise Exception('The model given by --model is not yet supported.')

        # Train the model.
        model.fit(X, y)
        # Save the model.
        try:
            with open(args.model_file, 'wb') as f:
                pickle.dump(model, f)
        except IOError:
            raise Exception("Exception while writing to the model file.")
        except pickle.PickleError:
            raise Exception("Exception while dumping model pickle.")

    elif args.mode.lower() == "test":
        # Load the test data.
        X, y = load_data(args.data)
        # Load the model.
        try:
            with open(args.model_file, 'rb') as f:
                model = pickle.load(f)
        except IOError:
            raise Exception("Exception while reading the model file.")
        except pickle.PickleError:
            raise Exception("Exception while loading model pickle.")

        # Compute and save the predictions.
        y_hat = model.predict(X)
        invalid_label_mask = (y_hat != 0) & (y_hat != 1)
        if any(invalid_label_mask):
            raise Exception(
                'All predictions must be 0 or 1, but found other predictions.')
        np.savetxt(args.predictions_file, y_hat, fmt='%d')

    else:
        raise Exception("Mode given by --mode is unrecognized.")
Esempio n. 2
0
def test():
    """
    test Perceptron, SVM and LDA accuracy
    """
    tested_models = [
        TestedModel('Perceptron', models.Perceptron()),
        TestedModel('SVM', models.SVM()),
        TestedModel('LDA', models.LDA()),
    ]

    k = 10000
    iterations = 500
    for i, m in enumerate(MS):
        for j in range(iterations):
            X, y = sample_d(m)
            X_t, y_t = sample_d(k)

            for tested in tested_models:
                tested.model.fit(X, y)
                score = tested.model.score(X_t, y_t)
                tested.add_accuracy(m, score['accuracy'])

    plt.figure()
    for tested in tested_models:
        plt.plot(MS, [tested.accuracy[m] for m in MS],
                 marker='.',
                 label=tested.name)
    plt.legend()
    plt.title('Training batch size vs. accuracy')
    plt.xlabel('m')
    plt.ylabel('accuracy')
    plt.show()

    for tested in tested_models:
        print(tested.name, tested.accuracy)
Esempio n. 3
0
def train(args):
    """ Fit a model's parameters given the parameters specified in args.
    """
    X, y = load_data(args.data)
    
    # build the appropriate model
    if args.algorithm == "perceptron":
        model = models.Perceptron(nfeatures=X.shape[1])
    elif args.algorithm == "logistic":
        model = models.LogisticRegression(nfeatures=X.shape[1])
    else:
        raise Exception("Algorithm argument not recognized")

    # Run the training loop
    for epoch in range(args.online_training_iterations):
        model.fit(X=X, y=y, lr=args.online_learning_rate)

    # Save the model
    pickle.dump(model, open(args.model_file, 'wb'))
Esempio n. 4
0
def train(train_data, val_data, lex, max_epochs, patience):
    train_data = list(train_data)
    model = models.Perceptron()
    best_accuracy = 0
    best_model = model
    no_improvement_since = 0
    for t in range(max_epochs):
        random.shuffle(train_data)
        train_one_epoch(train_data, lex, model)
        # Validate and see if the model got better:
        val_model = model.copy()
        val_model.average_weights()
        val_accuracy = validate(t, val_data, lex, val_model)
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_model = val_model
            no_improvement_since = 0
        else:
            no_improvement_since += 1
            if no_improvement_since > patience:
                break
    return best_model
Esempio n. 5
0
def plot():
    """
    plot the hyperplanes portrayed by each model for m samples sampled from the distribution m
    """
    ROWS = 2
    COLS = 3

    fig, axs = plt.subplots(ROWS, COLS, figsize=(15, 10))
    scatter = None
    for i, m in enumerate(MS):
        X, y = draw_points(m)
        x_lim = np.array([min(X[:, 0]), max(X[:, 0])])
        ax = axs[i // COLS, i % COLS]

        perceptron = models.Perceptron()
        perceptron.fit(X, y)
        svm = models.SVM()
        svm.fit(X, y)

        scatter = ax.scatter(X[:, 0], X[:, 1], c=y)
        ax.plot(x_lim, hyperplane_line(x_lim, W, BIAS), label='f')
        ax.plot(x_lim,
                hyperplane_line(x_lim, perceptron.get_w(), perceptron.get_b()),
                label='Perceptron')
        ax.plot(x_lim,
                hyperplane_line(x_lim, svm.get_w(), svm.get_b()),
                label='SVM')

        ax.set(xlabel='x', ylabel='y')
        ax.set_title(f"Data for m={m}")
        ax.legend()

    plt.legend(handles=scatter.legend_elements()[0],
               labels=('Negative', 'Positive'),
               loc=4)
    axs[-1, -1].axis('off')
    fig.tight_layout()
    plt.show()
Esempio n. 6
0
def main():
    args = get_args()
    check_args(args)
    
    if args.mode.lower() == "train":
        # Load the training data.
        X, y = load_data(args.data)
        
        # Create the model.
        # TODO: Add other algorithms as necessary.
        if args.algorithm.lower() == 'sumoffeatures':
            model = models.SumOfFeatures()
        elif args.algorithm.lower() == 'perceptron':
            model = models.Perceptron()
        elif args.algorithm.lower() == 'useless':
            model = models.Useless()
        elif args.algorithm.lower() == 'logisticregression':
            model = models.LogisticRegression()
        else:
            raise Exception('The model given by --model is not yet supported.')

        # Select features.
        num_orig_features = X.shape[1]
        index_array = np.empty(1)
        if args.num_features_to_select > 0:
            index_array = select_features(X, y, args.num_features_to_select)
            index_array = np.sort(index_array)
            X_selected = X[:, index_array[0]]
            for i in range(index_array.shape[0]):
                if i != 0:
                    X_selected = hstack([X_selected, X[:,index_array[i]]])
            X = X_selected
        
        # Train the model.
        if args.algorithm.lower() == 'perceptron':
            model.fit(X ,y, args.online_learning_rate, args.online_training_iterations)
        elif args.algorithm.lower() == 'logisticregression':
            model.fit(X, y, args.online_learning_rate, args.gd_iterations, num_orig_features, index_array)
        else:
            model.fit(X, y)

        # Save the model.
        try:
            with open(args.model_file, 'wb') as f:
                pickle.dump(model, f)
        except IOError:
            raise Exception("Exception while writing to the model file.")        
        except pickle.PickleError:
            raise Exception("Exception while dumping model pickle.")
            
    elif args.mode.lower() == "test":
        # Load the test data.
        X, y = load_data(args.data)
        
        # Load the model.
        try:
            with open(args.model_file, 'rb') as f:
                model = pickle.load(f)
        except IOError:
            raise Exception("Exception while reading the model file.")
        except pickle.PickleError:
            raise Exception("Exception while loading model pickle.")

        # Compute and save the predictions.
        y_hat = model.predict(X)
        invalid_label_mask = (y_hat != 0) & (y_hat != 1)
        if any(invalid_label_mask):
            raise Exception('All predictions must be 0 or 1, but found other predictions.')
        np.savetxt(args.predictions_file, y_hat, fmt='%d')
            
    else:
        raise Exception("Mode given by --mode is unrecognized.")
Esempio n. 7
0
def main():
    args = get_args()
    check_args(args)

    if args.mode.lower() == "train":
        # Load the training data.
        X, y = load_data(args.data)
        # print(type(args.data))

        # Create the model.
        # TODO: Add other algorithms as necessary.
        models.Perceptron(args.online_learning_rate, args.online_training_iterations)
  
        models.Logistic(args.online_learning_rate, args.online_training_iterations)
        models.nb(args.independent_mode, args.training_iterations, args.latent_states)

    

        # Create model for each algorithm 
        if args.algorithm.lower() == 'useless':
            model = models.Useless()
        elif args.algorithm.lower() == 'perceptron':
            model = models.Perceptron(args.online_learning_rate,args.online_training_iterations)
        elif args.algorithm.lower() == 'logistic':
            model = models.Logistic(args.online_learning_rate,args.online_training_iterations)
        elif args.algorithm.lower() == 'pegasos':
            model = models.Pegasos(args.online_learning_rate, args.online_training_iterations, args.pegasos_lambda)
        elif args.algorithm.lower() == 'nb':
            model = models.nb(args.independent_mode, args.training_iterations, args.latent_states)


        else:
            raise Exception('The model given by --model is not yet supported.')

        # Train the model.
        model.fit(X, y)

        # Save the model.
        try:
            with open(args.model_file, 'wb') as f:
                pickle.dump(model, f)
        except IOError:
            raise Exception("Exception while writing to the model file.")        
        except pickle.PickleError:
            raise Exception("Exception while dumping model pickle.")
            
    elif args.mode.lower() == "test":
        # Load the test data.
        X, y = load_data(args.data)

        # Load the model.
        try:
            with open(args.model_file, 'rb') as f:
                model = pickle.load(f)
        except IOError:
            raise Exception("Exception while reading the model file.")
        except pickle.PickleError:
            raise Exception("Exception while loading model pickle.")

        # Compute and save the predictions.
        y_hat = model.predict(X)
        # invalid_label_mask = (y_hat != 0) & (y_hat != 1)
        # if any(invalid_label_mask):
        #     raise Exception('All predictions must be 0 or 1, but found other predictions.')
        if np.issubdtype(type(y[0]), np.dtype(int)):
            np.savetxt(args.predictions_file, y_hat, fmt='%d')
        else:
            np.savetxt(args.predictions_file, y_hat, fmt='%s')

            
    else:
        raise Exception("Mode given by --mode is unrecognized.")
Esempio n. 8
0
def main():
    args = get_args()
    check_args(args)

    if args.mode.lower() == "train":
        # Load the training data.
        X, y = load_data(args.data)

        # Create the model.
        # TODO: Add other algorithms as necessary.
        if args.algorithm.lower() == 'adaboost':
            model = models.Adaboost(args.num_boosting_iterations)
            model.fit(X, y)
        elif args.algorithm.lower() == 'logisticregression':
            model = models.LogisticRegression(args.online_learning_rate,
                                              args.num_features_to_select,
                                              args.gd_iterations)
            model.fit(X, y)
        elif args.algorithm.lower() == 'sumoffeatures':
            model = models.SumOfFeatures()
            model.fit(X, y)
        elif args.algorithm.lower() == 'perceptron':
            model = models.Perceptron(args.online_learning_rate,
                                      args.online_training_iterations)
            model.fit(X, y)
        elif args.algorithm.lower() == 'lambda_means':
            model = models.LambdaMeans()
            model.fit(X,
                      y,
                      lambda0=args.cluster_lambda,
                      iterations=args.clustering_training_iterations)
        elif args.algorithm.lower() == 'stochastic_k_means':
            model = models.StochasticKMeans()
            model.fit(X,
                      y,
                      num_clusters=args.number_of_clusters,
                      iterations=args.clustering_training_iterations)
        elif args.algorithm.lower() == 'useless':
            model = models.Useless()
            model.fit(X, y)
        else:
            raise Exception('The model given by --model is not yet supported.')

        # Save the model.
        try:
            with open(args.model_file, 'wb') as f:
                pickle.dump(model, f)
        except IOError:
            raise Exception("Exception while writing to the model file.")
        except pickle.PickleError:
            raise Exception("Exception while dumping model pickle.")

    elif args.mode.lower() == "test":
        # Load the test data.
        X, y = load_data(args.data)

        # Load the model.
        try:
            with open(args.model_file, 'rb') as f:
                model = pickle.load(f)
        except IOError:
            raise Exception("Exception while reading the model file.")
        except pickle.PickleError:
            raise Exception("Exception while loading model pickle.")

        # Compute and save the predictions.
        y_hat = model.predict(X)
        invalid_label_mask = (y_hat != 0) & (y_hat != 1)
        if any(invalid_label_mask):
            raise Exception(
                'All predictions must be 0 or 1, but found other predictions.')
        np.savetxt(args.predictions_file, y_hat, fmt='%d')

    else:
        raise Exception("Mode given by --mode is unrecognized.")