Beispiel #1
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
        os.remove(os.path.join(TMP_DIR, 'extraction.data'))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
                               os.path.join(TMP_DIR, 'extraction.data'), 1)
    test_data = SparseDataSet(os.path.join(TMP_DIR, 'extraction.data'),
                              labelsColumn=-1)
    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.size(), 32)
    eq_(len(features('')), test_data.numFeatures)
Beispiel #2
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
        os.remove(os.path.join(TMP_DIR, 'extraction.data'))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
                               os.path.join(TMP_DIR,
                                            'extraction.data'), 1)
    test_data = SparseDataSet(os.path.join(TMP_DIR, 'extraction.data'),
                              labelsColumn=-1)
    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.size(), 32)
    eq_(len(features('')), test_data.numFeatures)
Beispiel #3
0
def train(classifier, train_data_filename, save_classifier_filename=None):
    '''Trains and saves classifier so that it could be easily loaded later.'''
    data = SparseDataSet(train_data_filename, labelsColumn=-1)
    classifier.train(data)
    if save_classifier_filename:
        classifier.save(save_classifier_filename)
    return classifier
Beispiel #4
0
def load(saved_classifier_filename, train_data_filename):
    """Loads saved classifier.

    Classifier should be loaded with the same data it was trained against
    """
    train_data = SparseDataSet(train_data_filename, labelsColumn=-1)
    classifier = init()
    classifier.load(saved_classifier_filename, train_data)
    return classifier
def train_test(ds_path):
	data = SparseDataSet(ds_path)
	g, c, fold = 0.25, 128, 2
	##################################################
	#### This part of the code does Does statistical 
	#### feature selection ...
	#labels = np.array([int(n) for n in data.labels.L])
	#ranks = rank_feat(data.getMatrix().T, labels)
	#ranks = [(abs(r),i) for i, r in enumerate(ranks)]
	#ranks.sort()
	#ranks.reverse()
	#feats = [f[1] for f in ranks]
	#data.keepFeatures(feats[:2662])

	data.attachKernel('gaussian', gamma = g)
	s=SVM(C=c)
	r = s.cv(data, numFolds=fold)
	o = open(ds_path+'.pkl', 'wb')
	pickle.dump(r, o)
	o.close();
	print ds_path
def train_test(ds_path):
    data = SparseDataSet(ds_path)
    g, c, fold = 0.25, 128, 2
    ##################################################
    #### This part of the code does Does statistical
    #### feature selection ...
    #labels = np.array([int(n) for n in data.labels.L])
    #ranks = rank_feat(data.getMatrix().T, labels)
    #ranks = [(abs(r),i) for i, r in enumerate(ranks)]
    #ranks.sort()
    #ranks.reverse()
    #feats = [f[1] for f in ranks]
    #data.keepFeatures(feats[:2662])

    data.attachKernel('gaussian', gamma=g)
    s = SVM(C=c)
    r = s.cv(data, numFolds=fold)
    o = open(ds_path + '.pkl', 'wb')
    pickle.dump(r, o)
    o.close()
    print ds_path
 def train(self, datalist, labelslist):    
     data = SparseDataSet(datalist, L = labelslist)
     self.svminstance.C = 20
     data.attachKernel('gaussian', degree = 5)
     self.svminstance.train(data)
Beispiel #8
0
def is_signature_line(line, sender, classifier):
    '''Checks if the line belongs to signature. Returns True or False.'''
    data = SparseDataSet([build_pattern(line, features(sender))])
    return classifier.decisionFunc(data, 0) > 0
Beispiel #9
0
from PyML import SparseDataSet, SVM

__author__ = 'basir'

data = SparseDataSet('data/heartSparse.data', labelsColumn=-1)
svm = SVM()
res = svm.cv(data, 5)
for fold in res:
    print fold
print res
# print data
# help(sequenceData.spectrum_data)
 def trainforTD(self, datalist, labelslist):
     data = SparseDataSet(datalist, L=labelslist)
     self.svminstance.train(data)
 def train(self, datalist, labelslist):
     data = SparseDataSet(datalist, L=labelslist)
     self.svminstance.C = 20
     data.attachKernel('gaussian', degree=5)
     self.svminstance.train(data)
 def predict(self, datalist):
     data = SparseDataSet(datalist)
     results = self.svminstance.test(data)
     return results.getPredictedLabels()[0]