Ejemplo n.º 1
0
    def train(self, features, grades):
        """Train a rank_svm on the specified features and grades.
        train_rank_svm(self, features, grades):
        
        features - numpy array/matrix with one row per essay
        grades - vector with one entry per essay
        """
        self.min_grade = min(grades)
        self.max_grade = max(grades)
        num_essays, num_features = features.shape
        
        # Convert data into svmlight format [(label, [(feature, value), ...], query_id), ...]
        training_data = []
        for essay_ind,grade in enumerate(grades):
            feature_list = [(feat_ind+1,feat_val) for feat_ind,feat_val in enumerate(features[essay_ind,:])]
            training_data.append((grade, feature_list, 1))

        self.model = svmlight.learn(training_data, type='ranking', verbosity=0, C=100)
        
        grade_counts = {}
        for grade in grades:
            if grade not in grade_counts:
                grade_counts[grade] = 0
            grade_counts[grade] += 1
        self.grade_probs = dict([(grade, count/float(num_essays)) for grade,count in grade_counts.iteritems()])
        scores = self.classify_rank_svm(features)
        self.curve = Curve(scores, probs=self.grade_probs)
Ejemplo n.º 2
0
    def _train_classifier(self):
        print "Training classifier..."
        docs = []  # list of strings
        for w in self.pos_sites:
            docs.extend([p.get_text(self.text_type) for p in w])
        for w in self.neg_sites:
            docs.extend([p.get_text(self.text_type) for p in w])
        self.vectorizer.fit(docs)
        print self.vectorizer.vocabulary_

        pos = np.array([
            w.get_vsm(self.vectorizer, self.text_type) for w in self.pos_sites
        ])
        pos = self._convert_to_svmlight(pos, 1)
        neg = np.array([
            w.get_vsm(self.vectorizer, self.text_type) for w in self.neg_sites
        ])
        neg = self._convert_to_svmlight(neg, -1)
        train = pos + neg
        print "Number of pos: ", len(pos)
        print "Number of neg: ", len(neg)

        #self.clf = svmlight.learn(train, type='classification', verbosity=0, cost_ratio=0.10, C=10)
        self.clf = svmlight.learn(train,
                                  type='classification',
                                  C=0.01,
                                  cost_ratio=2.0,
                                  verbosity=0)
Ejemplo n.º 3
0
def _classify(data, cond_info):
    "Runs a single classification"

    (n_runs, n_blocks) = cond_info.shape

    acc = np.empty((n_runs))
    acc.fill(np.NAN)

    for i_test_run in xrange(n_runs):

        # exclude the test run from the training set
        i_train = np.setdiff1d(np.arange(n_runs), [i_test_run])

        train_data = _format_data(data[:, i_train, :], cond_info[i_train, :])

        model = svmlight.learn(train_data,
                               type="classification",
                               kernel="linear")

        test_data = _format_data(data[:, [i_test_run], :],
                                 cond_info[[i_test_run], :])

        pred = svmlight.classify(model, test_data)

        f_acc = (float((np.sign(pred) == cond_info[i_test_run, :]).sum()) /
                 len(pred) * 100.0)

        acc[i_test_run] = f_acc

    assert np.sum(np.isnan(acc)) == 0

    return np.mean(acc)
Ejemplo n.º 4
0
def main_svmlight():
    # copied:
    import svmlight
    import pdb
    
    training_data = syntheticData(30, 1)
    test_data     = syntheticData(30, 1)
    #training_data = __import__('data').train0
    #test_data = __import__('data').test0

    print 'HERE 0'
    print 'training_data is', training_data
    print 'test_data is', test_data

    # train a model based on the data
    #pdb.set_trace()
    print 'HERE 1'
    model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3)
    print 'HERE 2'

    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    svmlight.write_model(model, 'my_model.dat')
    print 'HERE 3'

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    #predictions = svmlight.classify(model, test_data)
    pdb.set_trace()
    predictions = svmlight.classify(model, training_data)
    print 'HERE 4'
    
    for p,example in zip(predictions, test_data):
        print 'pred %.8f, actual %.8f' % (p, example[0])
Ejemplo n.º 5
0
def training_model(ind, n=3):
    print "Loading features"
    load_features(n, fmap)
    print "Feature map size: %s" % fmap.getSize()
    print "Getting training data"
    train = []
    for i in ind.get_pos_train_ind():
        item = os.listdir("pos")[i]
        train.append(
            (1, [(fmap.getID(item[0]), item[1])
                 for item in ngrams.ngrams(n,
                                           open("pos/" + item).read()).items()
                 if fmap.hasFeature(item[0])]))
    for i in ind.get_neg_train_ind():
        item = os.listdir("neg")[i]
        train.append((-1, [
            (fmap.getID(item[0]), item[1])
            for item in ngrams.ngrams(n,
                                      open("neg/" + item).read()).items()
            if fmap.hasFeature(item[0])
        ]))
    print "Training model"
    model = svmlight.learn(train, type='classification', verbosity=0)
    svmlight.write_model(model, 'my_model.dat')
    return model
Ejemplo n.º 6
0
    def train(self, docs):
        pos_word_lists = [doc[2] for doc in docs if doc[0] == "POS"]
        neg_word_lists = [doc[2] for doc in docs if doc[0] == "NEG"]
        vocabulary = set(flatten(pos_word_lists) + flatten(neg_word_lists))
        self.word_ids = {}
        a = []
        for k in vocabulary:
            self.word_ids[k] = self.curr_id
            self.curr_id += 1
        features = []
        for wordlist in pos_word_lists:

            if self.presence:
                wordlist = set(wordlist)

            c = Counter(wordlist)
            featureVec = [(self.word_ids.get(word), v)
                          for word, v in c.iteritems()]
            featureVec.sort(key=lambda x: x[0])
            features.append((1, featureVec))

        for wordlist in neg_word_lists:
            if self.presence:
                wordlist = set(wordlist)
            c = Counter(wordlist)
            featureVec = [(self.word_ids.get(word), v)
                          for word, v in c.iteritems()]
            featureVec.sort(key=lambda x: x[0])
            features.append((-1, featureVec))
        self.model = svmlight.learn(features)
Ejemplo n.º 7
0
	def train_multi_onevsall(self, x, y, unlab_x, strategy=1):

		num_classes = int(np.max(y) + 1)
		print x.shape, y.shape
		x, y = x[1:25000:2], y[1:25000:2]
		unlab_x = unlab_x[1:25000:1000]

		print "labelled points number:", x.shape[0]
		print "unlabelled points number:", unlab_x.shape[0]

		x_feat = self.svmlfeaturise(x)
		unlab_x_feat = self.svmlfeaturise(unlab_x)

		for i in xrange(num_classes):
			y_feat = (y==i)*2 - 1
			feats = []
			lab_feats = []
			unlab_feats = []
			for j in xrange(len(x_feat)):
				lab_feats.append((y_feat[j], x_feat[j]))

			# if unlab_x != None:
			# 	for j in xrange(len(unlab_x_feat)):
			# 		unlab_feats.append((0, unlab_x_feat[j]))

			feats = lab_feats + unlab_feats
			print "======SVM Model Training started======="
			model = svmlight.learn(feats, type='classification', verbosity=0, kernel='rbf', C=self.C, rbf_gamma=self.gamma)
			print i
			print "======SVM Model Training terminated======"
			self.models.append(model)
		self.trained = True
Ejemplo n.º 8
0
 def learnModel(self, X, y):
     dataList = self.__createData(X, y)
     self.model = svmlight.learn(dataList,
                                 type='ranking',
                                 verbosity=0,
                                 kernel=self.kernel,
                                 C=self.C,
                                 gamma=self.gamma)
Ejemplo n.º 9
0
 def fit_binary(self, X, y):
     '''
     Assume 'y' holds only 0 and 1.
     '''
     label = np.copy(y)
     label[label<=0] = -1
     train_data = self.toSvmlight(X, label)
     self.model = svmlight.learn(train_data)
Ejemplo n.º 10
0
 def _train_with_values(self, dataset, poly_degree=2, C=100):
   self.svm_list = []
   for month_ind in range(12):
     self._format_training_data(dataset, month_ind)
     if self.debug:
       print "Learning on month %d of 12 with %d samples..." %(month_ind+1, len(self.formatted_data))
     
     self.svm_list.append( svmlight.learn(self.formatted_data, type='regression', kernel='polynomial', poly_degree=poly_degree, C=C, verbosity=0) )
Ejemplo n.º 11
0
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'):
    # https://bitbucket.org/wcauchois/pysvmlight
    articles, total_token_count = preprocess_wsj(article_count, feature_functions)

    dictionary = Dictionary()
    dictionary.add_one('ZZZZZ')  # so that no features are labeled 0
    data = []
    for article in articles:
        for sentence in article:
            for tag, token_features in zip(sentence.def_tags, sentence.data):
                # only use def / indef tokens
                if tag in ('DEF', 'INDEF'):
                    features = dictionary.add(token_features)
                    features = sorted(list(set(features)))
                    feature_values = zip(features, [1]*len(features))
                    data.append((+1 if tag == 'DEF' else -1, feature_values))

    train, test = bifurcate(data, split, shuffle=True)

    # for corpus, name in [(train, 'train'), (test, 'test')]:
        # write_svm(corpus, 'wsj_svm-%s.data' % name)

    #####################
    # do svm in Python...
    model = svmlight.learn(train, type='classification', kernel=kernel)

    # svmlight.learn options
    # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'.
    # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'.
    # verbosity: set the verbosity level (default 0).
    # C: trade-off between training error and margin.
    # poly_degree: parameter d in polynomial kernel.
    # rbf_gamma: parameter gamma in rbf kernel.
    # coef_lin
    # coef_const
    # costratio (corresponds to -j option to svm_learn)
    svmlight.write_model(model, model_path)

    gold_labels, test_feature_values = zip(*test)
    # total = len(gold_labels)

    test_pairs = [(0, feature_values) for feature_values in test_feature_values]
    predictions = svmlight.classify(model, test_pairs)

    correct, wrong = matches(
        [(gold > 0) for gold in gold_labels],
        [(prediction > 0) for prediction in predictions])

    return dict(
        total_articles_count=len(articles),  # int
        total_token_count=total_token_count,  # int
        train_count=len(train),  # int
        test_count=len(test),  # int
        kernel=kernel,
        correct=correct,
        wrong=wrong,
        total=correct + wrong,
    )
Ejemplo n.º 12
0
def runSVMLight(trainName,testName, kerneltype, c_param = 1.0, gamma_param = 1.0, verbosity = 0):
    """
    converts data to python format only if not already in python format 
    (files in python format are of type list, otherwise they are filenames)
    
    inputs: trainName, either the training data in svm-light format or the name of the training data file in LIBSVM/sparse format
            testName, either the test data in svm-light format or the name of the test data file in LIBSVM/sparse format
            kerneltype, (str)the type of kernel (linear, polynomial, sigmoid, rbf, custom)
            c_param, the C parameter (default 1)
            gamma_param, the gamma parameter (default 1)
            verbosity, 0, 1, or 2 for less or more information (default 0)
    
    outputs: (positiveAccuracy, negativeAccuracy, accuracy)
    """
    if type(trainName) == list:
        trainingData = trainName
    else:
        trainingData = sparseToList(trainName)
        
    
    if type(testName) == list:
        testData = testName
    else:
        testData = sparseToList(testName)
        
    if verbosity == 2:
        print "Training svm......."

    # train a model based on the data
    model = svmlight.learn(trainingData, type='classification', verbosity=2, kernel=kerneltype, C = c_param, rbf_gamma = gamma_param )
    
    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    
    # if type(trainName) == list:
    #     svmlight.write_model(model, time.strftime('%Y-%m-%d-')+datetime.datetime.now().strftime('%H%M%S%f')+'_model.dat')
    # else:
    #     svmlight.write_model(model, trainName[:-4]+'_model.dat')
    
    if verbosity == 2:
        print "Classifying........"

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    predictions = svmlight.classify(model, testData)
    
    # for p in predictions:
    #     print '%.8f' % p
    
    correctLabels = correctLabelRemove(testData)

    # print 'Predictions:'
    # print predictions
    # print 'Correct Labels:'
    # print correctLabels

    return predictionCompare(predictions, correctLabels, verbosity)
Ejemplo n.º 13
0
def performSVMClassification(trainingData, testData):
    featureIndices = getFeatureIndices(trainingData + testData)

    formattedTrainingData = []
    formattedTestData = []

    for doc in trainingData:
        featureVector = []
        features = {}

        for f in doc[2]:
            # if options["shouldUsePresence"]:
            #     features[f] = 1
            # else:
            #     features[f] = 1 if f not in features else features[f] + 1
            features[f] = 1  # Our baseline should use always use presence

        for k, v in features.items():
            featureVector.append((featureIndices[k], v))

        list.sort(featureVector, key=lambda x: x[0])

        sentimentVal = 1 if doc[0] == "POS" else -1

        formattedTrainingData.append((sentimentVal, featureVector))

    model = svmlight.learn(formattedTrainingData)

    for doc in testData:
        featureVector = []
        features = {}

        for f in doc[2]:
            # if options["shouldUsePresence"]:
            #     features[f] = 1
            # else:
            #     features[f] = 1 if f not in features else features[f] + 1
            features[f] = 1  # Our baseline should use always use presence

        for k, v in features.items():
            featureVector.append((featureIndices[k], v))

        list.sort(featureVector, key=lambda x: x[0])

        formattedTestData.append((0, featureVector))

    judgements = svmlight.classify(model, formattedTestData)

    formattedJudgements = []

    i = 0
    for (sentiment, fileName, features) in testData:
        formattedJudgements.append(
            ("POS" if judgements[i] > 0 else "NEG", sentiment, fileName))
        i += 1

    return formattedJudgements
Ejemplo n.º 14
0
def my_cross_val_score(data_fold, train, c_p):

    scores = []
    for x, y in data_fold:
        data_x = collect_data_qid(x, train)
        data_y = collect_data_qid(y, train)
        model = SVC.learn(data_x, C=c_p, kernel='linear', type='ranking')
        pred = SVC.classify(model, data_y)
        scores.append(my_accus(data_y, pred))
    return scores
Ejemplo n.º 15
0
def trainAndTest(training, test):
    #trainingNames = [x[0] for x in training] # never used, but might be someday
    trainingData = [d.dataTuple() for d in training]
    testNames = [d.name for d in test]
    testData = [d.dataTuple() for d in test]
    testLabels = [d.label for d in test]
    
    model = svmlight.learn(trainingData)
    predictions = svmlight.classify(model,testData)
    return zip(predictions, testLabels, testNames)
Ejemplo n.º 16
0
def tsvm_test0():
    # data processing
    data, target = load_svmlight_file('dataset/following.scale')
    data, target = shuffle(data, target)
    target = binarize(target)[:,0]

    cutoff = int(round(data.shape[0] * 0.8))

    train_data = data[:cutoff]
    train_target = target[:cutoff]

    transductive_train_data = data
    transductive_target = target.copy()
    transductive_target[cutoff:] = 0

    test_data = data[cutoff:]
    test_target = target[cutoff:]

    # convert the data into svmlight format
    svm_train_data = npToSVMLightFormat(train_data, train_target)
    svm_transductive_train_data = npToSVMLightFormat(transductive_train_data,
            transductive_target)
    svm_test_data = npToSVMLightFormat(test_data, test_target)

    print 'labels in the training data'
    print countLabels(svm_transductive_train_data).most_common()

    # svmlight routine
    model = svmlight.learn(svm_train_data,
            j=3.0, kernel='linear', type='classification', verbosity=0)
    trans_model = svmlight.learn(svm_transductive_train_data,
            j=3.0, kernel='linear', type='classification', verbosity=0)

    predictions = svmlight.classify(model, svm_test_data)
    trans_predictions = svmlight.classify(trans_model, svm_test_data)

    print 'inductive learning'
    print accuracy(predictions, test_target)
    print '(recall, precision)', recall_precision(predictions, test_target)

    print 'transductive learning'
    print accuracy(trans_predictions, test_target)
    print '(recall, precision)', recall_precision(trans_predictions, test_target)
Ejemplo n.º 17
0
    def predict(self, X):
        y = np.zeros(X.shape[0]).tolist()
        test_data = self.toSvmlight(X, y)
        all_data = self.train_data + test_data

        if self.class_dist:
            pos_ratio = self.class_dist[1]
            self.model = svmlight.learn(all_data, verbosity=1, transduction_posratio=pos_ratio)
            # self.model = svmlight.learn(all_data)
        else:
            self.model = svmlight.learn(self.train_data)

        predictions = np.array(svmlight.classify(self.model, test_data))
        predictions[predictions > 0] = 1
        predictions[predictions <= 0] = 0
        # from collections import Counter
        # print Counter(predictions)

        return predictions
Ejemplo n.º 18
0
    def train_svm(self, docs):
        # docs is a list of (pos/neg, filename, wordlist)
        svmlight_lines = []
        for doc in docs:
            fv = list(enumerate(self.doc2vec_model.infer_vector(doc[2]), 1))
            svmlight_line = (1 if doc[0] == "POS" else -1, fv)
            svmlight_lines.append(svmlight_line)

        self.svm_model = learn(svmlight_lines)
        print("*** SVM TRAINED ***")
Ejemplo n.º 19
0
def trainAndTest(training, test):
    #trainingNames = [x[0] for x in training] # never used, but might be someday
    trainingData = [(d[1],d[2]) for d in training]

    testNames = [d[0] for d in test]
    testData = [(d[1],d[2]) for d in test]
    testLabels = [d[1] for d in test]
    
    model = svm.learn(trainingData)
    predictions = svm.classify(model,testData)
    return zip(predictions, testLabels, testNames)
Ejemplo n.º 20
0
def five_fold_validation(training_sets, validation_sets, c_value):
    total_accuracy= 0.0
    for i in range(len(training_sets)):

        model= svmlight.learn(training_sets[i], type='classification', C=c_value)
        classifications= svmlight.classify(model, validation_sets[i])
        predictions= change_to_binary_predictions(classifications)
        accuracy= find_accuracy(validation_sets[i], predictions)
        total_accuracy += accuracy[0]

    return total_accuracy/len(training_sets)
Ejemplo n.º 21
0
    def train(self):
        """Learn model weights from training instances."""

        # Train using svmlight
        self._svmmodel = svmlight.learn(self._training_data, type='ranking')

        # Write svmlight output to a temp file and recover weights
        modelout = NamedTemporaryFile(delete=False)
        svmlight.write_model(self._svmmodel, modelout.name)
        modelout.close()
        self._recover_weights(modelout.name)
        remove(modelout.name)
Ejemplo n.º 22
0
    def train(self, pos_word_lists, neg_word_lists):
        if self.stemming:
            porter_stemmer = PorterStemmer()
            pos_word_lists = [[porter_stemmer.stem(x) for x in l]
                              for l in pos_word_lists]

            neg_word_lists = [[porter_stemmer.stem(x) for x in l]
                              for l in neg_word_lists]

        if self.bigrams:
            if self.unigrams:
                neg_word_lists = [
                    zip(docwords, docwords[1:]) + docwords
                    for docwords in neg_word_lists
                ]
                pos_word_lists = [
                    zip(docwords, docwords[1:]) + docwords
                    for docwords in pos_word_lists
                ]
            else:
                neg_word_lists = [
                    zip(docwords, docwords[1:]) for docwords in neg_word_lists
                ]
                pos_word_lists = [
                    zip(docwords, docwords[1:]) for docwords in pos_word_lists
                ]

        vocabulary = set(flatten(pos_word_lists) + flatten(neg_word_lists))
        self.word_ids = {}
        a = []
        for k in vocabulary:
            self.word_ids[k] = self.curr_id
            self.curr_id += 1
        features = []
        for wordlist in pos_word_lists:

            if self.presence:
                wordlist = set(wordlist)

            c = Counter(wordlist)
            featureVec = [(self.word_ids.get(word), v)
                          for word, v in c.iteritems()]
            featureVec.sort(key=lambda x: x[0])
            features.append((1, featureVec))
        for wordlist in neg_word_lists:
            if self.presence:
                wordlist = set(wordlist)
            c = Counter(wordlist)
            featureVec = [(self.word_ids.get(word), v)
                          for word, v in c.iteritems()]
            featureVec.sort(key=lambda x: x[0])
            features.append((-1, featureVec))
        self.model = svmlight.learn(features)
Ejemplo n.º 23
0
def my_cross_val_score(data_fold, train, c_p):

    scores = []
    for x, y in data_fold:
        data_x = collect_data_qid(x, train)
        data_y = collect_data_qid(y, train)
        model = SVC.learn(data_x, C=c_p, kernel='linear', type='ranking')
        pred = SVC.classify(model, data_y)
        scores.append(
            my_accus(data_y, pred)
            )
    return scores
Ejemplo n.º 24
0
def trainall():
    """
    使用svm训练0-9 10个数字样本
    :return:
    """
    for i in range(10):
        print "training ", i
        training_data = totrain(i)
        model = svmlight.learn(training_data, type="classification", verbosity=0)
        model_name = "model/" + str(i)
        svmlight.write_model(model, model_name)  # write model

        """
Ejemplo n.º 25
0
def svm(training_set, test_set):
    feature_indices = get_feature_indices(training_set + test_set)

    formatted_training_set = []
    formatted_test_set = []

    for (sentiment, file_name, features) in training_set:
        feature_vec = []
        feature_freqs = {}

        for w in features:
            if options["usePresence"] or w not in feature_freqs:
                feature_freqs[w] = 1
            else:
                feature_freqs[w] += 1

        for word, count in feature_freqs.items():
            feature_vec.append((feature_indices[word], count))

        list.sort(feature_vec, key=lambda x: x[0])

        sent_val = 1 if sentiment == "POS" else -1
        formatted_training_set.append((sent_val, feature_vec))

    model = svmlight.learn(formatted_training_set)

    for (sentiment, file_name, features) in test_set:
        feature_vec = []
        feature_freqs = {}

        for w in features:
            if w not in feature_freqs:
                feature_freqs[w] = 1
            else:
                feature_freqs[w] += 1

        for word, count in feature_freqs.items():
            feature_vec.append((feature_indices[word], count))

        list.sort(feature_vec, key=lambda x: x[0])
        formatted_test_set.append((0, feature_vec))

    predictions = svmlight.classify(model, formatted_test_set)

    formatted_predictions = []
    idx = 0
    for (sentiment, file_name, features) in test_set:
        formatted_predictions.append(
            ("POS" if predictions[idx] > 0 else "NEG", sentiment, file_name))
        idx += 1
    return formatted_predictions
Ejemplo n.º 26
0
    def train(featuresets):
        """
        given a set of training instances in nltk format:
        [ ( {feature:value, ..}, str(label) ) ]
        train a support vector machine

        :param featuresets: training instances
        """

        _raise_if_svmlight_is_missing()

        # build a unique list of labels
        labels = set()
        for (features, label) in featuresets:
            labels.add(label)

        # this is a binary classifier only
        if len(labels) > 2:
            raise ValueError('Can only do boolean classification (labels: ' +
                             str(labels) + ')')
            return False

        # we need ordering, so a set's no good
        labels = list(labels)

        # next, assign -1 and 1
        labelmapping = {labels[0]: -1, labels[1]: 1}

        # now for feature conversion
        # iter through instances, building a set of feature:type:str(value) triples
        svmfeatures = set()
        for (features, label) in featuresets:
            for k, v in compat.iteritems(features):
                svmfeatures.add(featurename(k, v))
        # svmfeatures is indexable by integer svm feature number
        # svmfeatureindex is the inverse (svm feature name -> number)
        svmfeatures = list(svmfeatures)
        svmfeatureindex = dict(zip(svmfeatures, range(len(svmfeatures))))

        # build svm feature set case by case
        svmfeatureset = []
        for instance in featuresets:
            svmfeatureset.append(
                map_instance_to_svm(instance, labelmapping, svmfeatureindex))

        # train the svm
        # TODO: implement passing of SVMlight parameters from train() to learn()
        return SvmClassifier(
            labels, labelmapping, svmfeatures,
            svmlight.learn(svmfeatureset, type='classification'))
Ejemplo n.º 27
0
def train(fnames, topics):

  training_data = init_train_data(fnames, topics)
  print ('[ train ] ===================')

  with open(TRAINING_DATA, 'w') as f :
    pprint.pprint(training_data, f)
  # train a model based on the data
  model = svmlight.learn(training_data, type='ranking', kernel = 'linear',  verbosity=0)
  
  # model data can be stored in the same format SVM-Light uses, for interoperability
  # with the binaries.
  svmlight.write_model(model, 'ef_model.dat')
  ZC.dump_cache()
Ejemplo n.º 28
0
def svm(train_docs, train_labels, test_docs, params):

    kernel, param = params
    train_docs_svm = to_svmlight_format(train_docs, (1 if l == 1 else -1
                                                     for l in train_labels))
    test_docs_svm = to_svmlight_format(test_docs, np.zeros(test_docs.shape[0]))

    if kernel == 'rbf':
        model = svmlight.learn(train_docs_svm,
                               type='classification',
                               kernel='rbf',
                               rbf_gamma=param)
    elif kernel == 'poly' or kernel == 'polynomial':
        model = svmlight.learn(train_docs_svm,
                               type='classification',
                               kernel='polynomial',
                               poly_degree=param)
    else:
        raise ValueError('Unsupported svm parameters: ' + str(params))

    margins = svmlight.classify(model, test_docs_svm)
    predict_labels = [1 if p > 0 else 0 for p in margins]

    return predict_labels
Ejemplo n.º 29
0
def SVM_experiment(data_fold, train, test, dumper):

    param = {'C': []}
    for i in range(-15, 15):
        param['C'].append(pow(2, i))
    c_best = my_GridSearchCV(data_fold, train, param)

    dumper.write("Classifier: SVM\n")
    dumper.write('Best Parameters: %f' % (c_best))

    model = SVC.learn(train, C=c_best, kernel='linear', type='ranking')
    ret = mySVM(model)
    pred = ranking_test(ret, test)
    output_ranking(pred, codecs.open('svm.ranking', 'w', 'utf-8'))
    return None
Ejemplo n.º 30
0
def SVM_experiment(data_fold, train, test, dumper):

    param = { 'C':[] }
    for i in range(-15, 15):
        param['C'].append(pow(2, i))
    c_best = my_GridSearchCV(data_fold, train, param)

    dumper.write("Classifier: SVM\n")
    dumper.write('Best Parameters: %f'%(c_best))

    model = SVC.learn(train, C=c_best, kernel='linear', type='ranking')
    ret = mySVM(model)
    pred = ranking_test(ret, test)
    output_ranking(pred, codecs.open('svm.ranking', 'w', 'utf-8'))
    return None
Ejemplo n.º 31
0
def find_models(examples, c_value):
    '''
    For each class of example article, create a model.  These models will be used to
    determine the liklihood that a test example comes from each class's source.
    '''
    models= {}
    learned_classes= {}
    for example in examples:
        class_num = example[0]
        if class_num not in learned_classes:
            #print class_num
            train= change_to_binary_examples(examples, class_num)
            models[class_num]= svmlight.learn(train, type='classification', C=c_value)
            learned_classes[class_num] = 1

    return models
Ejemplo n.º 32
0
def training_model(ind,n=3):
    print "Loading features"
    load_features(n,fmap)
    print "Feature map size: %s" % fmap.getSize()
    print "Getting training data"
    train = []
    for i in ind.get_pos_train_ind():
        item = os.listdir("pos")[i]
        train.append((1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("pos/"+item).read()).items() if fmap.hasFeature(item[0])]))
    for i in ind.get_neg_train_ind():
        item = os.listdir("neg")[i]
        train.append((-1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("neg/"+item).read()).items() if fmap.hasFeature(item[0])]))
    print "Training model"
    model = svmlight.learn(train, type='classification', verbosity=0)
    svmlight.write_model(model, 'my_model.dat')
    return model
Ejemplo n.º 33
0
def train(fnames, topics):

    training_data = init_train_data(fnames, topics)
    print('[ train ] ===================')

    with open(TRAINING_DATA, 'w') as f:
        pprint.pprint(training_data, f)
    # train a model based on the data
    model = svmlight.learn(training_data,
                           type='ranking',
                           kernel='linear',
                           verbosity=0)

    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    svmlight.write_model(model, 'ef_model.dat')
    ZC.dump_cache()
Ejemplo n.º 34
0
    def train(featuresets):
        """
        given a set of training instances in nltk format:
        [ ( {feature:value, ..}, str(label) ) ]
        train a support vector machine

        :param featuresets: training instances
        """

        _raise_if_svmlight_is_missing()

        # build a unique list of labels
        labels = set()
        for (features, label) in featuresets:
            labels.add(label)

        # this is a binary classifier only
        if len(labels) > 2:
            raise ValueError('Can only do boolean classification (labels: '+ str(labels) + ')')
            return False

        # we need ordering, so a set's no good
        labels = list(labels)

        # next, assign -1 and 1
        labelmapping = {labels[0]:-1, labels[1]:1}

        # now for feature conversion
        # iter through instances, building a set of feature:type:str(value) triples
        svmfeatures = set()
        for (features, label) in featuresets:
            for k,v in compat.iteritems(features):
                svmfeatures.add(featurename(k, v))
        # svmfeatures is indexable by integer svm feature number
        # svmfeatureindex is the inverse (svm feature name -> number)
        svmfeatures = list(svmfeatures)
        svmfeatureindex = dict(zip(svmfeatures, range(len(svmfeatures))))

        # build svm feature set case by case
        svmfeatureset = []
        for instance in featuresets:
            svmfeatureset.append(map_instance_to_svm(instance, labelmapping, svmfeatureindex))

        # train the svm
        # TODO: implement passing of SVMlight parameters from train() to learn()
        return SvmClassifier(labels, labelmapping, svmfeatures, svmlight.learn(svmfeatureset, type='classification'))
Ejemplo n.º 35
0
 def fit(self, train_x, train_y, unlabeled_x=None):
     if self.rbf_gamma == 0:
         self.rbf_gamma = 1./train_x.shape[1]
     n_y = np.max(train_y)+1
     self.models = []
     feats = toSVMLightFeatures(train_x)
     if unlabeled_x != None:
         feats_unlabeled = toSVMLightFeatures(unlabeled_x)
     for i in range(n_y):
         train_y_binary = (train_y==i)*2-1
         input = []
         for i in range(len(feats)):
             input.append((train_y_binary[i], feats[i]))
         for i in range(len(feats_unlabeled)):
             input.append((0, feats_unlabeled[i]))
         _model = svmlight.learn(input, type='classification', kernel='rbf', C=self.C, rbf_gamma=self.rbf_gamma)
         self.models.append(_model)
     self.fitted = True
Ejemplo n.º 36
0
    def train(self, label_values, converted=False):
        """Build a model.

        Args:
            label_values: Iterable of tuples of label and list-like objects
                Example: [(label, value), ...]
                or the result of using convert_label_values if converted=True.
            converted: If True then the input is in the correct internal format
        Returns:
            self
        """
        if not converted:
            label_values = self.convert_label_values(label_values)
        if not isinstance(label_values, list):
            label_values = list(label_values)
        self._m = svmlight.learn(label_values, type='classification',
                                 verbosity=1)
        return self
Ejemplo n.º 37
0
 def __makeModel(self):
     self.labelList = Labels()
     self.labelList.makeAllLabels(self.__imgdata.namelist)
     self.__models = list()
     for name in self.labelList.getLabellist():
         label = self.labelList.name2label(name)
         traindata = list()
         for imageidx in range(len(self.__weights)):
             imageweights = self.__weights[imageidx]
             facelabel = self.labelList.name2label(self.__imgdata.namelist[imageidx].partition("_")[0])
             
             if facelabel == label:
                 example = 1
             else:
                 example = -1
             
             traindata.append((example, self.__makeWeightTuplesList(imageweights)))
         temp_model = svmlight.learn(traindata, type='classification', verbosity=3)
         self.__models.append((name, temp_model))
Ejemplo n.º 38
0
def test_svmlight():
    training_data = [(1, [(1, 2), (2, 5), (3, 6), (5, 1), (4, 2), (6, 1)]),
                     (1, [(1, 2), (2, 1), (3, 4), (5, 3), (4, 1), (6, 1)]),
                     (1, [(1, 2), (2, 2), (3, 4), (5, 1), (4, 1), (6, 1)]),
                     (1, [(1, 2), (2, 1), (3, 3), (5, 1), (4, 1), (6, 1)]),
                     (-1, [(1, 2), (2, 1), (3, 1), (5, 3), (4, 2), (6, 1)]),
                     (-1, [(1, 1), (2, 1), (3, 1), (5, 3), (4, 1), (6, 1)]),
                     (-1, [(1, 1), (2, 2), (3, 1), (5, 3), (4, 1), (6, 1)]),
                     (-1, [(1, 1), (2, 1), (3, 1), (5, 1), (4, 3), (6, 1)]),
                     (-1, [(1, 2), (2, 1), (3, 1), (5, 2), (4, 1), (6, 5)]),
                     (-1, [(7, 10)])]

    test_data = [(0, [(1, 2), (2, 6), (3, 4), (5, 1), (4, 1), (6, 1)]),
                 (0, [(1, 2), (2, 6), (3, 4)])]

    model = svmlight.learn(training_data, type='classification', verbosity=0)
    svmlight.write_model(model, 'my_model.dat')
    predictions = svmlight.classify(model, test_data)
    for p in predictions:
        print '%.8f' % p
Ejemplo n.º 39
0
def test_svmlight():
    training_data = [(1, [(1,2),(2,5),(3,6),(5,1),(4,2),(6,1)]),
                     (1, [(1,2),(2,1),(3,4),(5,3),(4,1),(6,1)]),
                     (1, [(1,2),(2,2),(3,4),(5,1),(4,1),(6,1)]),
                     (1, [(1,2),(2,1),(3,3),(5,1),(4,1),(6,1)]),
                     (-1, [(1,2),(2,1),(3,1),(5,3),(4,2),(6,1)]),
                     (-1, [(1,1),(2,1),(3,1),(5,3),(4,1),(6,1)]),
                     (-1, [(1,1),(2,2),(3,1),(5,3),(4,1),(6,1)]),
                     (-1, [(1,1),(2,1),(3,1),(5,1),(4,3),(6,1)]),
                     (-1, [(1,2),(2,1),(3,1),(5,2),(4,1),(6,5)]),
                     (-1, [(7,10)])]
    
    test_data = [(0, [(1,2),(2,6),(3,4),(5,1),(4,1),(6,1)]),
                 (0, [(1,2),(2,6),(3,4)])]
    
    model = svmlight.learn(training_data, type='classification', verbosity=0)
    svmlight.write_model(model, 'my_model.dat')
    predictions = svmlight.classify(model, test_data)
    for p in predictions:
        print '%.8f' % p
Ejemplo n.º 40
0
def performDoc2VecJudgement(trainingData, testData):
    doc2vecModel = Doc2Vec.load("/Users/Matteo/Desktop/doc2vec_models/final_model")

    trainingFeatureVectors = [(1 if doc[0] == 'POS' else -1, doc2vecModel.infer_vector(doc[2])) for doc in trainingData]
    testFeatureVectors = [(0, doc2vecModel.infer_vector(doc[2])) for doc in testData]

    formattedTrainingFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in trainingFeatureVectors]
    formattedTestFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in testFeatureVectors]

    svmModel = svmlight.learn(formattedTrainingFeatureVectors)
    judgements = svmlight.classify(svmModel, formattedTestFeatureVectors)

    predictions = []

    i = 0
    for (sentiment, fileName, features) in testData:
        predictions.append((judgements[i], sentiment, fileName))

        i += 1

    return predictions
Ejemplo n.º 41
0
 def fit(self, train_x, train_y, unlabeled_x=None):
     if self.rbf_gamma == 0:
         self.rbf_gamma = 1. / train_x.shape[1]
     n_y = np.max(train_y) + 1
     self.models = []
     feats = toSVMLightFeatures(train_x)
     if unlabeled_x != None:
         feats_unlabeled = toSVMLightFeatures(unlabeled_x)
     for i in range(n_y):
         train_y_binary = (train_y == i) * 2 - 1
         input = []
         for i in range(len(feats)):
             input.append((train_y_binary[i], feats[i]))
         for i in range(len(feats_unlabeled)):
             input.append((0, feats_unlabeled[i]))
         _model = svmlight.learn(input,
                                 type='classification',
                                 kernel='rbf',
                                 C=self.C,
                                 rbf_gamma=self.rbf_gamma)
         self.models.append(_model)
     self.fitted = True
    def __makeModel(self):
        self.labelList = Labels()
        self.labelList.makeAllLabels(self.__imgdata.namelist)
        self.__models = list()
        for name in self.labelList.getLabellist():
            label = self.labelList.name2label(name)
            traindata = list()
            for imageidx in range(len(self.__weights)):
                imageweights = self.__weights[imageidx]
                facelabel = self.labelList.name2label(
                    self.__imgdata.namelist[imageidx].partition("_")[0])

                if facelabel == label:
                    example = 1
                else:
                    example = -1

                traindata.append(
                    (example, self.__makeWeightTuplesList(imageweights)))
            temp_model = svmlight.learn(traindata,
                                        type='classification',
                                        verbosity=3)
            self.__models.append((name, temp_model))
Ejemplo n.º 43
0
 def fit(self, X, y, unlabeled_data=None):
   num_data = X.shape[0]+unlabeled_data.shape[0]
   num_unlabeled = unlabeled_data.shape[0]
   labeled = xrange(X.shape[0])
   unlabeled = xrange(X.shape[0], num_data)
   if issparse(X):
     X = vstack((X, unlabeled_data), format='csr')
   else:
     X = np.concatenate((X, unlabeled_data))
   self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
   Y_labeled = self._label_binarizer.fit_transform(y)
   self.num_classes_ = Y_labeled.shape[1]
   Y_unlabeled = np.zeros(
       (num_unlabeled, self.num_classes_,), dtype=np.float32)
   Y = np.zeros((num_data, self.num_classes_), dtype=np.float32)
   Y[labeled] = Y_labeled
   Y[unlabeled] = Y_unlabeled
   self.model_ = []
   for i in xrange(self.num_classes_):
     y_column = Y[:, i]
     self.model_.append(
         svm.learn(self.__data2docs(X, y_column),
                   type='classification'.encode()))
Ejemplo n.º 44
0
def main_svmlight():
    # copied:
    import svmlight
    import pdb

    training_data = syntheticData(30, 1)
    test_data = syntheticData(30, 1)
    #training_data = __import__('data').train0
    #test_data = __import__('data').test0

    print 'HERE 0'
    print 'training_data is', training_data
    print 'test_data is', test_data

    # train a model based on the data
    #pdb.set_trace()
    print 'HERE 1'
    model = svmlight.learn(training_data,
                           type='regression',
                           kernelType=2,
                           verbosity=3)
    print 'HERE 2'

    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    svmlight.write_model(model, 'my_model.dat')
    print 'HERE 3'

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    #predictions = svmlight.classify(model, test_data)
    pdb.set_trace()
    predictions = svmlight.classify(model, training_data)
    print 'HERE 4'

    for p, example in zip(predictions, test_data):
        print 'pred %.8f, actual %.8f' % (p, example[0])
			nskipped = 1
			if len(sentences) > 1:  # because there have to be transitions
				docModel = DummyDocModel(sentences)
				grid = TextrazorEntityGrid(docModel.cleanSentences(), 1, textrazorEntities, textrazorSentences)
				if grid.valid and len(grid.matrixIndices) > 0:
					grid.printMatrix()
					featureVector = FeatureVector(grid, clusterIndex)
					featureVector.printVector()
					featureVector.printVectorWithIndices()
					vector = featureVector.getVector(qualityScore)
					featureVectors.append(vector)
					docIndex += 1

			else:
				print "SKIPPING (not enough sentences) %s, nskipped=(%d)" % (fileName, nskipped)
				nskipped += 1
		else:
			print "SKIPPING (no pickle file)%s, nskipped=(%d)" % (fileName, nskipped)
			nskipped += 1

		# pickleFile = open("../cache/svmlightCache/featureVectors.pickle", 'wb')
		# pickle.dump(featureVectors, pickleFile, pickle.HIGHEST_PROTOCOL)
		# pickleFile.close()
		# if docIndex >= maxN:
		#	break
		numDocsTried += 1
	clusterIndex += 1
# now train on the data
model = svmlight.learn(featureVectors, type='ranking', verbosity=0)
svmlight.write_model(model, '../cache/svmlightCache/svmlightModel.dat')
Ejemplo n.º 46
0
def svm():
	# load the sentiment score file with (word,pos) -> (posScore,negScore) dictionary
	# and the (review,sentiment) pair list
	synDict = pickle.load(open('sentiment_score.pickle','rb'))
	annot = pickle.load(open('sent_400_wspos.pickle','rb'))
	poscount = bothcount = 0
	posTot = 0
	bothTot = 0

	print annot

	# 0 -> pos, 1 -> neg, 2 -> both, 3 -> neut
	data = {'pos':[],'neg':[],'both':[],'neut':[]}
	strToNum = {'pos':0,'neg':1,'both':2,'neut':3}

	for line,sent in annot:
		score = (0,0)
		hits = 0 # number of words found in dictionary, for scaling

		string = ''
		for word in line.split():
			string += word.split('#')[0]+' '

		neg = negate.negating(string.strip(' '))

		# catch empty case, simpler than re-pickling
		if neg == []:
			continue

		# calculate (posScore, negScore) for each word in line
		for i,word in enumerate(line.split()):
			tri = word.split('#')
			tempscore = (0,0)
			if len(tri) == 3:
				hits += 1
				pair = (tri[0]+'#'+tri[2],tri[1])
				tempscore = synDict.get(pair,(0,0))
	
				if 'NOT' in neg[i]:
					tempscore = (tempscore[1],tempscore[0]) # set to reverse value b/c inverted meaning

				
			score = (score[0]+tempscore[0],score[1]+tempscore[1]) # add tempscore to score

		data[sent.strip(' ')].append(score)
		


	featList = []

	# convert to feature lists
	for key in data.keys():
		featList.append(map(lambda (a,b): (strToNum[key],[(1,a),(2,b)]),data[key]))

	

	# construct test and train sets as fractions of featList
	train = featList[0][:3*len(featList[0])/4]+featList[1][:3*len(featList[1])/4]+featList[2][:3*len(featList[2])/4]+featList[3][:3*len(featList[3])/4]
	test = featList[0][3*len(featList[0])/4:]+featList[1][3*len(featList[1])/4:]+featList[2][3*len(featList[2])/4:]+featList[3][3*len(featList[3])/4:]

	for element in train:
		print element

	# train and test model
	model = svmlight.learn(train, type='classification', verbosity=0)
	svmlight.write_model(model, 'my_model1.dat')
	predictions = svmlight.classify(model, test)
	for p in predictions:
		#print '%.8f' % p
		pass	
Ejemplo n.º 47
0
def run_svm(article_count,
            feature_functions,
            kernel='polynomial',
            split=0.9,
            model_path='svm.model'):
    # https://bitbucket.org/wcauchois/pysvmlight
    articles, total_token_count = preprocess_wsj(article_count,
                                                 feature_functions)

    dictionary = Dictionary()
    dictionary.add_one('ZZZZZ')  # so that no features are labeled 0
    data = []
    for article in articles:
        for sentence in article:
            for tag, token_features in zip(sentence.def_tags, sentence.data):
                # only use def / indef tokens
                if tag in ('DEF', 'INDEF'):
                    features = dictionary.add(token_features)
                    features = sorted(list(set(features)))
                    feature_values = zip(features, [1] * len(features))
                    data.append((+1 if tag == 'DEF' else -1, feature_values))

    train, test = bifurcate(data, split, shuffle=True)

    # for corpus, name in [(train, 'train'), (test, 'test')]:
    # write_svm(corpus, 'wsj_svm-%s.data' % name)

    #####################
    # do svm in Python...
    model = svmlight.learn(train, type='classification', kernel=kernel)

    # svmlight.learn options
    # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'.
    # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'.
    # verbosity: set the verbosity level (default 0).
    # C: trade-off between training error and margin.
    # poly_degree: parameter d in polynomial kernel.
    # rbf_gamma: parameter gamma in rbf kernel.
    # coef_lin
    # coef_const
    # costratio (corresponds to -j option to svm_learn)
    svmlight.write_model(model, model_path)

    gold_labels, test_feature_values = zip(*test)
    # total = len(gold_labels)

    test_pairs = [(0, feature_values)
                  for feature_values in test_feature_values]
    predictions = svmlight.classify(model, test_pairs)

    correct, wrong = matches([(gold > 0) for gold in gold_labels],
                             [(prediction > 0) for prediction in predictions])

    return dict(
        total_articles_count=len(articles),  # int
        total_token_count=total_token_count,  # int
        train_count=len(train),  # int
        test_count=len(test),  # int
        kernel=kernel,
        correct=correct,
        wrong=wrong,
        total=correct + wrong,
    )
Ejemplo n.º 48
0
 def fit(self, data, target):
     svm_train_data = npToSVMLightFormat(data, target)
     model = svmlight.learn(svm_train_data,
             j=1.0, kernel='linear', type='classification', verbosity=0)
     self.model = model
Ejemplo n.º 49
0
    for d, line in enumerate(f):
        line = line.strip().split(',')
        words = [int(x.split(':')[0]) + 1 for x in line[1:]]
        sort = np.argsort(words)
        counts = [int(x.split(':')[1]) for x in line[1:]]
        text.append(zip(np.array(words)[sort], np.array(counts)[sort]))
text = np.array(text)


def save_classifier(clf, clf_i):
    directory = args.output_directory
    if not os.path.exists(directory):
        os.makedirs(directory)
    svmlight.write_model(clf, os.path.join(directory, str(clf_i)))


#########################################
## Train SVM classifiers

classifiers = {}
for i in range(I):
    if i != root and len(inv_codes[i]) > 0:
        print i, 'of', I
        clf_child = -np.ones(D, np.int)
        clf_child[np.array(inv_codes[i])] = 1
        clf_text = text[np.array(inv_codes[CtoP[i]])]
        clf_code = clf_child[np.array(inv_codes[CtoP[i]])]
        training = zip(clf_code, clf_text)
        clf = svmlight.learn(training, type='classification')
        save_classifier(clf, i)
Ejemplo n.º 50
0
	def train_binary(self, x, y):
		train_data_svml = svmlfeaturisexy(x, y)
		model = svmlight.learn(train_data_svml, type='classification', verbosity=0, kernel='rbf', C=self.C, rbf_gamma=self.gamma)
		svmlight.write_model(model, 'tsvm_mnist.dat')
    return prepared_data


folds = get_n_folds(10)
for i, test_fold in enumerate(folds):
    print("Fold: " + str(i))
    training = merge_all_folds_except(i, folds)
    features_mapping = get_features(training)

    svm_train = prepare_data_for_svm(training, features_mapping)
    print("Train Ratio = " + str(get_neg_proportion(training)))
    print("Test Ratio = " + str(get_neg_proportion(test_fold)))
    print(len(training))
    print(len(test_fold))

    model = svmlight.learn(svm_train, type='classification')

    svm_test = prepare_data_for_svm(test_fold, features_mapping)
    svm_test_with_unknown_class = [(0, features) for _, features in svm_test]
    # predictions = svmlight.classify(model, svm_test_with_unknown_class)
    pos_count = 0
    # for i,p in enumerate(predictions):
    #     truth = svm_test[i][0]
    #     if truth*p > 0:
    #         print("Correct: %.8f" % p)
    #         pos_count += 1
    #     else :
    #         print("Incorre: %.8f" % p)
    # print(pos_count)

# model data can be stored in the same format SVM-Light uses, for
Ejemplo n.º 52
0
 def learnModel(self, X, y):
     dataList = self.__createData(X, y)
     self.model = svmlight.learn(dataList, type='ranking', verbosity=0, kernel=self.kernel, C=self.C, gamma=self.gamma)
Ejemplo n.º 53
0
        val = int(float(counter) / len(filenames) * 100)
        if val in percentages and percentages[val]:
            print " Progress: %i %s" % (val, "%")
            percentages[val] = False

        try:
            source = open(directory + filename, 'r')
            train_type = int(source.readline())
            train_num_dimensions = int(source.readline())
            train_dimensions = source.readline().strip().split()
            source.close()

            num = 1
            vals = []
            for val in train_dimensions:
                vals.append((num, float(val)))
                num += 1

            training_data.append((train_type, vals))

        except Exception as e:
            print "ERROR:", e
            break
        counter += 1

print "Imported:", len(training_data), "\n"
print "Building Model"
model = svmlight.learn(training_data, type='classification', verbosity=0)
print "Write Model"
svmlight.write_model(model, 'svm-model.dat')
Ejemplo n.º 54
0
        elif not standard:
            data_test = np.array(list(data_test))
            nsamples, nx = data_test.shape
            data_test = data_test.reshape((nsamples, nx))

        dump_svmlight_file(data_test, target_test, 'aux/test_' + ts + '.txt')

        train = svm_parse('aux/train_' + ts + '.txt')
        aux = svm_parse('aux/test_' + ts + '.txt')
        test, val = adapt_to_svmlight_format(aux)

        print("Training it=", it, "cost-factor=", cost_factor + 1)

        model = svmlight.learn(list(train),
                               type='classification',
                               verbosity=0,
                               costratio=cost_factor +
                               1)  ## costratio = cost-factor

        if dump == "yes":
            svmlight.write_model(
                model,
                "models/model_" + dataset + "_" + features + "_it" + str(it) +
                "_cost_fact" + str(cost_factor + 1) + "_" + ts + ".dat")

        predictions = svmlight.classify(model, test)
        print("Predicting it=", it, "cost-factor=", cost_factor + 1)

        tp, tn, fp, fn = evaluate(predictions)
        accuracies.append(
            weighted_accuracy(cost_factor + 1, tn, tp, fn, fp) * 100)