def main_svmlight(): # copied: import svmlight import pdb training_data = syntheticData(30, 1) test_data = syntheticData(30, 1) #training_data = __import__('data').train0 #test_data = __import__('data').test0 print 'HERE 0' print 'training_data is', training_data print 'test_data is', test_data # train a model based on the data #pdb.set_trace() print 'HERE 1' model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3) print 'HERE 2' # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') print 'HERE 3' # classify the test data. this function returns a list of numbers, which represent # the classifications. #predictions = svmlight.classify(model, test_data) pdb.set_trace() predictions = svmlight.classify(model, training_data) print 'HERE 4' for p,example in zip(predictions, test_data): print 'pred %.8f, actual %.8f' % (p, example[0])
def training_model(ind, n=3): print "Loading features" load_features(n, fmap) print "Feature map size: %s" % fmap.getSize() print "Getting training data" train = [] for i in ind.get_pos_train_ind(): item = os.listdir("pos")[i] train.append( (1, [(fmap.getID(item[0]), item[1]) for item in ngrams.ngrams(n, open("pos/" + item).read()).items() if fmap.hasFeature(item[0])])) for i in ind.get_neg_train_ind(): item = os.listdir("neg")[i] train.append((-1, [ (fmap.getID(item[0]), item[1]) for item in ngrams.ngrams(n, open("neg/" + item).read()).items() if fmap.hasFeature(item[0]) ])) print "Training model" model = svmlight.learn(train, type='classification', verbosity=0) svmlight.write_model(model, 'my_model.dat') return model
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'): # https://bitbucket.org/wcauchois/pysvmlight articles, total_token_count = preprocess_wsj(article_count, feature_functions) dictionary = Dictionary() dictionary.add_one('ZZZZZ') # so that no features are labeled 0 data = [] for article in articles: for sentence in article: for tag, token_features in zip(sentence.def_tags, sentence.data): # only use def / indef tokens if tag in ('DEF', 'INDEF'): features = dictionary.add(token_features) features = sorted(list(set(features))) feature_values = zip(features, [1]*len(features)) data.append((+1 if tag == 'DEF' else -1, feature_values)) train, test = bifurcate(data, split, shuffle=True) # for corpus, name in [(train, 'train'), (test, 'test')]: # write_svm(corpus, 'wsj_svm-%s.data' % name) ##################### # do svm in Python... model = svmlight.learn(train, type='classification', kernel=kernel) # svmlight.learn options # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'. # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'. # verbosity: set the verbosity level (default 0). # C: trade-off between training error and margin. # poly_degree: parameter d in polynomial kernel. # rbf_gamma: parameter gamma in rbf kernel. # coef_lin # coef_const # costratio (corresponds to -j option to svm_learn) svmlight.write_model(model, model_path) gold_labels, test_feature_values = zip(*test) # total = len(gold_labels) test_pairs = [(0, feature_values) for feature_values in test_feature_values] predictions = svmlight.classify(model, test_pairs) correct, wrong = matches( [(gold > 0) for gold in gold_labels], [(prediction > 0) for prediction in predictions]) return dict( total_articles_count=len(articles), # int total_token_count=total_token_count, # int train_count=len(train), # int test_count=len(test), # int kernel=kernel, correct=correct, wrong=wrong, total=correct + wrong, )
def train(self): """Learn model weights from training instances.""" # Train using svmlight self._svmmodel = svmlight.learn(self._training_data, type='ranking') # Write svmlight output to a temp file and recover weights modelout = NamedTemporaryFile(delete=False) svmlight.write_model(self._svmmodel, modelout.name) modelout.close() self._recover_weights(modelout.name) remove(modelout.name)
def trainall(): """ 使用svm训练0-9 10个数字样本 :return: """ for i in range(10): print "training ", i training_data = totrain(i) model = svmlight.learn(training_data, type="classification", verbosity=0) model_name = "model/" + str(i) svmlight.write_model(model, model_name) # write model """
def train(fnames, topics): training_data = init_train_data(fnames, topics) print ('[ train ] ===================') with open(TRAINING_DATA, 'w') as f : pprint.pprint(training_data, f) # train a model based on the data model = svmlight.learn(training_data, type='ranking', kernel = 'linear', verbosity=0) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'ef_model.dat') ZC.dump_cache()
def training_model(ind,n=3): print "Loading features" load_features(n,fmap) print "Feature map size: %s" % fmap.getSize() print "Getting training data" train = [] for i in ind.get_pos_train_ind(): item = os.listdir("pos")[i] train.append((1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("pos/"+item).read()).items() if fmap.hasFeature(item[0])])) for i in ind.get_neg_train_ind(): item = os.listdir("neg")[i] train.append((-1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("neg/"+item).read()).items() if fmap.hasFeature(item[0])])) print "Training model" model = svmlight.learn(train, type='classification', verbosity=0) svmlight.write_model(model, 'my_model.dat') return model
def train(fnames, topics): training_data = init_train_data(fnames, topics) print('[ train ] ===================') with open(TRAINING_DATA, 'w') as f: pprint.pprint(training_data, f) # train a model based on the data model = svmlight.learn(training_data, type='ranking', kernel='linear', verbosity=0) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'ef_model.dat') ZC.dump_cache()
def test_svmlight(): training_data = [(1, [(1,2),(2,5),(3,6),(5,1),(4,2),(6,1)]), (1, [(1,2),(2,1),(3,4),(5,3),(4,1),(6,1)]), (1, [(1,2),(2,2),(3,4),(5,1),(4,1),(6,1)]), (1, [(1,2),(2,1),(3,3),(5,1),(4,1),(6,1)]), (-1, [(1,2),(2,1),(3,1),(5,3),(4,2),(6,1)]), (-1, [(1,1),(2,1),(3,1),(5,3),(4,1),(6,1)]), (-1, [(1,1),(2,2),(3,1),(5,3),(4,1),(6,1)]), (-1, [(1,1),(2,1),(3,1),(5,1),(4,3),(6,1)]), (-1, [(1,2),(2,1),(3,1),(5,2),(4,1),(6,5)]), (-1, [(7,10)])] test_data = [(0, [(1,2),(2,6),(3,4),(5,1),(4,1),(6,1)]), (0, [(1,2),(2,6),(3,4)])] model = svmlight.learn(training_data, type='classification', verbosity=0) svmlight.write_model(model, 'my_model.dat') predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def test_svmlight(): training_data = [(1, [(1, 2), (2, 5), (3, 6), (5, 1), (4, 2), (6, 1)]), (1, [(1, 2), (2, 1), (3, 4), (5, 3), (4, 1), (6, 1)]), (1, [(1, 2), (2, 2), (3, 4), (5, 1), (4, 1), (6, 1)]), (1, [(1, 2), (2, 1), (3, 3), (5, 1), (4, 1), (6, 1)]), (-1, [(1, 2), (2, 1), (3, 1), (5, 3), (4, 2), (6, 1)]), (-1, [(1, 1), (2, 1), (3, 1), (5, 3), (4, 1), (6, 1)]), (-1, [(1, 1), (2, 2), (3, 1), (5, 3), (4, 1), (6, 1)]), (-1, [(1, 1), (2, 1), (3, 1), (5, 1), (4, 3), (6, 1)]), (-1, [(1, 2), (2, 1), (3, 1), (5, 2), (4, 1), (6, 5)]), (-1, [(7, 10)])] test_data = [(0, [(1, 2), (2, 6), (3, 4), (5, 1), (4, 1), (6, 1)]), (0, [(1, 2), (2, 6), (3, 4)])] model = svmlight.learn(training_data, type='classification', verbosity=0) svmlight.write_model(model, 'my_model.dat') predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def main_svmlight(): # copied: import svmlight import pdb training_data = syntheticData(30, 1) test_data = syntheticData(30, 1) #training_data = __import__('data').train0 #test_data = __import__('data').test0 print 'HERE 0' print 'training_data is', training_data print 'test_data is', test_data # train a model based on the data #pdb.set_trace() print 'HERE 1' model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3) print 'HERE 2' # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') print 'HERE 3' # classify the test data. this function returns a list of numbers, which represent # the classifications. #predictions = svmlight.classify(model, test_data) pdb.set_trace() predictions = svmlight.classify(model, training_data) print 'HERE 4' for p, example in zip(predictions, test_data): print 'pred %.8f, actual %.8f' % (p, example[0])
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'): # https://bitbucket.org/wcauchois/pysvmlight articles, total_token_count = preprocess_wsj(article_count, feature_functions) dictionary = Dictionary() dictionary.add_one('ZZZZZ') # so that no features are labeled 0 data = [] for article in articles: for sentence in article: for tag, token_features in zip(sentence.def_tags, sentence.data): # only use def / indef tokens if tag in ('DEF', 'INDEF'): features = dictionary.add(token_features) features = sorted(list(set(features))) feature_values = zip(features, [1] * len(features)) data.append((+1 if tag == 'DEF' else -1, feature_values)) train, test = bifurcate(data, split, shuffle=True) # for corpus, name in [(train, 'train'), (test, 'test')]: # write_svm(corpus, 'wsj_svm-%s.data' % name) ##################### # do svm in Python... model = svmlight.learn(train, type='classification', kernel=kernel) # svmlight.learn options # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'. # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'. # verbosity: set the verbosity level (default 0). # C: trade-off between training error and margin. # poly_degree: parameter d in polynomial kernel. # rbf_gamma: parameter gamma in rbf kernel. # coef_lin # coef_const # costratio (corresponds to -j option to svm_learn) svmlight.write_model(model, model_path) gold_labels, test_feature_values = zip(*test) # total = len(gold_labels) test_pairs = [(0, feature_values) for feature_values in test_feature_values] predictions = svmlight.classify(model, test_pairs) correct, wrong = matches([(gold > 0) for gold in gold_labels], [(prediction > 0) for prediction in predictions]) return dict( total_articles_count=len(articles), # int total_token_count=total_token_count, # int train_count=len(train), # int test_count=len(test), # int kernel=kernel, correct=correct, wrong=wrong, total=correct + wrong, )
def save_classifier(clf, clf_i): directory = args.output_directory if not os.path.exists(directory): os.makedirs(directory) svmlight.write_model(clf, os.path.join(directory, str(clf_i)))
if val in percentages and percentages[val]: print " Progress: %i %s" %(val, "%") percentages[val] = False try: source = open(directory + filename, 'r') train_type = int(source.readline()) train_num_dimensions = int(source.readline()) train_dimensions = source.readline().strip().split() source.close() num = 1 vals=[] for val in train_dimensions: vals.append((num, float(val))) num += 1 training_data.append((train_type, vals)) except Exception as e: print "ERROR:", e break counter += 1 print "Imported:", len(training_data), "\n" print "Building Model" model = svmlight.learn(training_data, type='classification', verbosity=0) print "Write Model" svmlight.write_model(model, 'svm-model.dat')
import svmlight training_data = __import__('data').train0 test_data = __import__('data').test0 # train a model based on the data model = svmlight.learn(training_data, type='classification', verbosity=0) # model data can be stored in the same format SVM-Light uses, for # interoperability with the binaries. svmlight.write_model(model, 'my_model.dat') # classify the test data. this function returns a list of numbers, which # represent the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print('%.8f' % p)
import anglepy.ndict as ndict from anglepy.misc import lazytheanofunc import svmlight ''' To install pysvmlight (on MAC): 1) cd to pysvmlight dir > export CFLAGS=-Qunused-arguments > export CPPFLAGS=-Qunused-arguments > chmod +x setup.py > ./setup.py build > sudo ./setup.py install ''' ''' ===> Example from pysvmlight doc: # train a model based on the data model = svmlight.learn(training_data, type='classification', verbosity=0) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p '''
test_data = __import__('data').test0 train = [ (1,[(1,0.5),(2,0.125)]), (1,[(1,0.25),(2,0.125)]), (1,[(1,1.75),(2,0.0)]), (0,[(1,0.125),(2,0.25)]), (0,[(1,0.5),(2,1)]), (0,[(1,0.3),(2,0.4)])] #(3,[(1,0.125),(2,0.2)]), #(3,[(1,0),(2,0)]), #(3,[(1,1),(2,1.1)])] test = [ (1,[(1,1.0),(2,0.1)]), (-1,[(1,0.1),(2,2.1)])] # train a model based on the data model = svmlight.learn(train, type='ranking', verbosity=0) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test) for p in predictions: print '%.8f' % p
def train_binary(self, x, y): train_data_svml = svmlfeaturisexy(x, y) model = svmlight.learn(train_data_svml, type='classification', verbosity=0, kernel='rbf', C=self.C, rbf_gamma=self.gamma) svmlight.write_model(model, 'tsvm_mnist.dat')
import collections as C import anglepy as ap import anglepy.ndict as ndict from anglepy.misc import lazytheanofunc import svmlight ''' To install pysvmlight (on MAC): 1) cd to pysvmlight dir > export CFLAGS=-Qunused-arguments > export CPPFLAGS=-Qunused-arguments > chmod +x setup.py > ./setup.py build > sudo ./setup.py install ''' ''' ===> Example from pysvmlight doc: # train a model based on the data model = svmlight.learn(training_data, type='classification', verbosity=0) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p '''
print '%s f-measure: %f' % (label, f_measure(ref, test) or 0) if args.show_most_informative and hasattr(classifier, 'show_most_informative_features') and not (args.multi and args.binary) and not args.cross_fold: print '%d most informative features' % args.show_most_informative classifier.show_most_informative_features(args.show_most_informative) ############## ## pickling ## ############## if not args.no_pickle and not args.cross_fold: if args.filename: fname = os.path.expanduser(args.filename) else: name = '%s_%s.pickle' % (args.corpus, '_'.join(args.classifier)) fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name.lower()) # We can't persist the SVM classifier directly since it contains # C-objects. We need to save the model separately. if classifier.__class__.__name__ == "SvmClassifier": import svmlight model_name = '%s_%s_model.dat' % (args.corpus, '_'.join(args.classifier)) model_fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), model_name.lower()) svmlight.write_model(classifier._model, model_fname) # Remove the model from the classifier so it can be saved. classifier._model = None dump_object(classifier, fname, trace=args.trace)
train = svm_parse('aux/train_' + ts + '.txt') aux = svm_parse('aux/test_' + ts + '.txt') test, val = adapt_to_svmlight_format(aux) print("Training it=", it, "cost-factor=", cost_factor + 1) model = svmlight.learn(list(train), type='classification', verbosity=0, costratio=cost_factor + 1) ## costratio = cost-factor if dump == "yes": svmlight.write_model( model, "models/model_" + dataset + "_" + features + "_it" + str(it) + "_cost_fact" + str(cost_factor + 1) + "_" + ts + ".dat") predictions = svmlight.classify(model, test) print("Predicting it=", it, "cost-factor=", cost_factor + 1) tp, tn, fp, fn = evaluate(predictions) accuracies.append( weighted_accuracy(cost_factor + 1, tn, tp, fn, fp) * 100) predictions = np.array(predictions) predictions[predictions < 0] = -1 predictions[predictions > 0] = 1 f1_micro.append( f1_score(val, predictions, average='micro') ) # micro: calculates metrics totally by counting the total true positives, false negatives and false positives cl = f1_score(val, predictions,
nskipped = 1 if len(sentences) > 1: # because there have to be transitions docModel = DummyDocModel(sentences) grid = TextrazorEntityGrid(docModel.cleanSentences(), 1, textrazorEntities, textrazorSentences) if grid.valid and len(grid.matrixIndices) > 0: grid.printMatrix() featureVector = FeatureVector(grid, clusterIndex) featureVector.printVector() featureVector.printVectorWithIndices() vector = featureVector.getVector(qualityScore) featureVectors.append(vector) docIndex += 1 else: print "SKIPPING (not enough sentences) %s, nskipped=(%d)" % (fileName, nskipped) nskipped += 1 else: print "SKIPPING (no pickle file)%s, nskipped=(%d)" % (fileName, nskipped) nskipped += 1 # pickleFile = open("../cache/svmlightCache/featureVectors.pickle", 'wb') # pickle.dump(featureVectors, pickleFile, pickle.HIGHEST_PROTOCOL) # pickleFile.close() # if docIndex >= maxN: # break numDocsTried += 1 clusterIndex += 1 # now train on the data model = svmlight.learn(featureVectors, type='ranking', verbosity=0) svmlight.write_model(model, '../cache/svmlightCache/svmlightModel.dat')
def svm(): # load the sentiment score file with (word,pos) -> (posScore,negScore) dictionary # and the (review,sentiment) pair list synDict = pickle.load(open('sentiment_score.pickle','rb')) annot = pickle.load(open('sent_400_wspos.pickle','rb')) poscount = bothcount = 0 posTot = 0 bothTot = 0 print annot # 0 -> pos, 1 -> neg, 2 -> both, 3 -> neut data = {'pos':[],'neg':[],'both':[],'neut':[]} strToNum = {'pos':0,'neg':1,'both':2,'neut':3} for line,sent in annot: score = (0,0) hits = 0 # number of words found in dictionary, for scaling string = '' for word in line.split(): string += word.split('#')[0]+' ' neg = negate.negating(string.strip(' ')) # catch empty case, simpler than re-pickling if neg == []: continue # calculate (posScore, negScore) for each word in line for i,word in enumerate(line.split()): tri = word.split('#') tempscore = (0,0) if len(tri) == 3: hits += 1 pair = (tri[0]+'#'+tri[2],tri[1]) tempscore = synDict.get(pair,(0,0)) if 'NOT' in neg[i]: tempscore = (tempscore[1],tempscore[0]) # set to reverse value b/c inverted meaning score = (score[0]+tempscore[0],score[1]+tempscore[1]) # add tempscore to score data[sent.strip(' ')].append(score) featList = [] # convert to feature lists for key in data.keys(): featList.append(map(lambda (a,b): (strToNum[key],[(1,a),(2,b)]),data[key])) # construct test and train sets as fractions of featList train = featList[0][:3*len(featList[0])/4]+featList[1][:3*len(featList[1])/4]+featList[2][:3*len(featList[2])/4]+featList[3][:3*len(featList[3])/4] test = featList[0][3*len(featList[0])/4:]+featList[1][3*len(featList[1])/4:]+featList[2][3*len(featList[2])/4:]+featList[3][3*len(featList[3])/4:] for element in train: print element # train and test model model = svmlight.learn(train, type='classification', verbosity=0) svmlight.write_model(model, 'my_model1.dat') predictions = svmlight.classify(model, test) for p in predictions: #print '%.8f' % p pass
val = int(float(counter) / len(filenames) * 100) if val in percentages and percentages[val]: print " Progress: %i %s" % (val, "%") percentages[val] = False try: source = open(directory + filename, 'r') train_type = int(source.readline()) train_num_dimensions = int(source.readline()) train_dimensions = source.readline().strip().split() source.close() num = 1 vals = [] for val in train_dimensions: vals.append((num, float(val))) num += 1 training_data.append((train_type, vals)) except Exception as e: print "ERROR:", e break counter += 1 print "Imported:", len(training_data), "\n" print "Building Model" model = svmlight.learn(training_data, type='classification', verbosity=0) print "Write Model" svmlight.write_model(model, 'svm-model.dat')