Ejemplo n.º 1
0
def test():
    """
    test Perceptron, SVM and LDA accuracy
    """
    tested_models = [
        TestedModel('Perceptron', models.Perceptron()),
        TestedModel('SVM', models.SVM()),
        TestedModel('LDA', models.LDA()),
    ]

    k = 10000
    iterations = 500
    for i, m in enumerate(MS):
        for j in range(iterations):
            X, y = sample_d(m)
            X_t, y_t = sample_d(k)

            for tested in tested_models:
                tested.model.fit(X, y)
                score = tested.model.score(X_t, y_t)
                tested.add_accuracy(m, score['accuracy'])

    plt.figure()
    for tested in tested_models:
        plt.plot(MS, [tested.accuracy[m] for m in MS],
                 marker='.',
                 label=tested.name)
    plt.legend()
    plt.title('Training batch size vs. accuracy')
    plt.xlabel('m')
    plt.ylabel('accuracy')
    plt.show()

    for tested in tested_models:
        print(tested.name, tested.accuracy)
Ejemplo n.º 2
0
def main(args):
    """ 
    Fit a model's parameters given the parameters specified in args.
    """
    # Load the data.
    X, vocab = build_dtm(args.datadir, args.num_documents, args.max_words)

    # build the appropriate model
    inference = models.MeanFieldVariationalInference(
        num_topics=args.num_topics,
        num_docs=X.shape[0],
        num_words=X.shape[1],
        alpha=args.alpha,
        beta=args.beta,
        epsilon=args.epsilon)

    # Run the inference method
    model = models.LDA(inference=inference)
    model.fit(X=X,
              iterations=args.num_vi_iterations,
              estep_iterations=args.num_estep_iterations)

    # predict topic assignments for words in corpus
    preds = model.predict(vocab=vocab, K=args.top_k)

    # output model predictions
    with open(args.predictions_file, 'w') as file:
        for pred in preds:
            file.write(pred + '\n')
Ejemplo n.º 3
0
def main(args):
    """ 
    Fit a model's parameters given the parameters specified in args.
    """
    # Load the data.
    X, vocab = build_dtm(args.data, args.num_documents)

    # build the appropriate model
    if args.inference.lower() == 'gibbs-sampling':
        inference = models.GibbsSampling(num_topics=args.num_topics,
                                         num_docs=X.shape[0],
                                         num_words=X.shape[1],
                                         alpha=args.alpha,
                                         beta=args.beta)
    elif args.inference.lower() == 'sum-product':
        inference = models.SumProduct(num_topics=args.num_topics,
                                      num_docs=X.shape[0],
                                      num_words=X.shape[1],
                                      num_nonzero=X.nnz,
                                      alpha=args.alpha,
                                      beta=args.beta)
    else:
        raise Exception(
            'The method given by --inference is not yet supported.')

    # Run the inference method
    model = models.LDA(inference=inference)
    model.fit(X=X, iterations=args.iterations)

    # predict topic assignments for words in corpus
    preds = model.predict(vocab=vocab, K=args.top_k)

    # output model predictions
    with open(args.predictions_file, 'w') as file:
        for pred in preds:
            file.write(pred + '\n')
Ejemplo n.º 4
0
pca.fit(data)
new_data2 = pca.fit_transform(data)
new_test_data2 = pca.fit_transform(test_data)
print('pca dimension fit')
'''

svd.fit(data)
print('SVD fit')

new_data = svd.fit_transform(data)
new_test_data = svd.fit_transform(test_data)
print('dimensions transformed')
print(new_data.shape)


lda_model = models.LDA()
s_lda = LinearDiscriminantAnalysis(solver = 'eigen')

s_lda.fit(new_data,y)
s_lda.fit(new_data2,y)

lda_model.fit(new_data,y)
print('model fit')
pred = lda_model.predict(new_test_data)
print(lda_model.score(np.array(pred),test_y))
print(s_lda.score(new_test_data,test_y))

clf = LinearSVC(random_state=0, tol=1e-5)

clf.fit(new_data,y)
# Selecting the best models for classification.
# May 2019

import sys
sys.path.insert(0, '/Users/chirathhettiarachchi/tensorflow/clardia/preprocess')
import experiments as exp
import models as model
import json

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Read selected features
with open('../cache/selected_features.json') as f:
    data = json.load(f)
features = data['diabetes']
print("Selected Features: ", features)

x, y = exp.experiment_3(features)

cutoff = x.shape[0] - 30
valx = x[cutoff:]
valy = y[cutoff:]
x = x[:cutoff]
y = y[:cutoff]

model = model.LDA(x, y, x, y, 4)