Ejemplo n.º 1
0
def knn_experiment():
    K = len(topics)
    dm = data.create_data_manager()
    ordered_topics = dm.order_topics(topics)

    print 'loading train data . . .'
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, ordered_topics)

    print 'loading test data . . .'
    X_test, Y_test = dm.load_data('test')
    Y_gold = dm.slice_Y(Y_test, ordered_topics)

    final_results = {}

    print 'interating over models . . .'
    for k in [1,15,31,45,61]:
        model = '%d-nearest neighbours' % k

        print 'now using model %s . . .' % model
        learner = knn.MultikNN(K, k)
        # K being the number of topics, and k being the number of neighbours
        learner.train(X_train, Y_train_slice)
        Y_pred = learner.batch_predict_classes(X_test)

        results = evaluate.per_topic_results(Y_pred, Y_gold)
        results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)}

        pprint(results_dict)
        final_results[model] = results_dict

    print 'saving final results . . .'
    with open('results/knn_final_results.txt', 'w') as f:
        pprint(final_results, stream=f)
Ejemplo n.º 2
0
Archivo: knn.py Proyecto: aadah/ml_proj
 def distance_matrix(self):
     print 'creating distance_matrix . . .'
     dm = data.create_data_manager()
     X_train, _ =  dm.load_data('train')
     X_test, _ = dm.load_data('test')
     
     distances = np.empty((X_test.shape[0], X_train.shape[0]))
     for row in xrange(X_test.shape[0]):
         print 'row', row
         x = X_test[row]
         distances[row] = np.array([self._distance(x, X_train[col]) for col in xrange(X_train.shape[0])])
     np.save('distance_matrix.npy', distances)
     return distances
Ejemplo n.º 3
0
def test2():
    print 'loading data . . .'
    dm = data.create_data_manager()
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, topics)

    print 'training knn . . .'
    K = len(topics)
    learner = knn.MultikNN(K, 30)
    learner.train(X_train, Y_train_slice)

    print 'testing knn . . .'
    X_test, Y_test = dm.load_data('test')
    Y_test_slice = dm.slice_Y(Y_test, topics)
    errors = learner.classification_errors(X_test, Y_test_slice)

    return errors
Ejemplo n.º 4
0
def test():
    # test run using degree-3 polynomial kernel with C=1
    # for the ten main topics in reuters
    dm = data.create_data_manager()

    print 'loading train data . . .'
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, topics)

    print 'training svm . . .'
    K = len(topics)
    learner = svm2.MultiSVM(K, 1.0, 'poly', 3)
    learner.train(X_train, Y_train_slice)

    print 'loading test data . . .'
    X_test, Y_test = dm.load_data('test')
    Y_gold = dm.slice_Y(Y_test, topics)

    print 'predicting train . . .'
    Y_pred = learner.batch_predict_classes(X_train)
    
    print 'evaluating . . .'
    precision, recall = evaluate.precision_recall(Y_pred, Y_train_slice)
    f1 = evaluate.f_score(precision, recall)

    print 'Precision: %.3f' % precision
    print 'Recall: %.3f' % recall
    print 'F1: %.3f' % f1

    print 'predicting test . . .'
    Y_pred = learner.batch_predict_classes(X_test)
    
    print 'evaluating . . .'
    precision, recall = evaluate.precision_recall(Y_pred, Y_gold)
    f1 = evaluate.f_score(precision, recall)

    print 'Precision: %.3f' % precision
    print 'Recall: %.3f' % recall
    print 'F1: %.3f' % f1
Ejemplo n.º 5
0
def svm_experiment(C, balance=''):
    K = len(topics)
    dm = data.create_data_manager()
    ordered_topics = dm.order_topics(topics)

    print 'loading train data . . .'
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, ordered_topics)

    print 'loading test data . . .'
    X_test, Y_test = dm.load_data('test')
    Y_gold = dm.slice_Y(Y_test, ordered_topics)

    final_results = {}

    #print (Y_train_slice.sum(axis=0) + Y_train_slice.shape[0]).tolist()
    #raise Exception

    print 'interating over models . . .'
    for model, kernel in svm_kernels:
        print 'now using model %s . . .' % model
        learner = svm.MultiSVM(K, C, kernel)
        learner.train(X_train, Y_train_slice,
                      ordered_topics,
                      balance=balance, # use balance = '' if don't want to balance
                      max_per_class=3000)
        Y_pred = learner.batch_predict_classes(X_test)

        results = evaluate.per_topic_results(Y_pred, Y_gold)
        results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)}

        pprint(results_dict)
        final_results[model] = results_dict

    print 'saving final results . . .'
    with open('results/svm_final_results_C_%.1f_%s.txt' % (C,balance), 'w') as f:
        pprint(final_results, stream=f)
Ejemplo n.º 6
0
 def __init__(self, XY=None):
     self.dm = data.create_data_manager()
     if XY == None:
         self.X, self.Y = self.dm.load_data('train')
     else:
         self.X, self.Y = XY