Esempio n. 1
0
    def predict_proba(self, data):
        ''' returns the confidence of being included in the positive class '''
        dummy_target = np.zeros(data.shape[0])
        svm_test_data = npToSVMLightFormat(data, dummy_target)
        predictions = svmlight.classify(self.model, svm_test_data)

        return np.array(predictions)
Esempio n. 2
0
def ball_only_classifier(circles, color_image, bonus_radius):
    model = svmlight.read_model("./output/best_single_cup_model_for_ball")
    ff = find_features()
    # TODO: fix
    label = 0
    best_classification = 0.5
    best_circle = None
    best_circle_pixels = None
    for c in circles[:6]:
        pixels, circle = find_pixels(c, color_image, bonus_radius)
        # create features for that circle
        features = ff.generate_features(pixels, label)
        features = parse_one_line(features)
        print features
        # run the classifier on that circle
        classification = svmlight.classify(model, [features])
        print classification
        if classification[0] > best_classification:
            best_classification = classification
            best_circle = [c]
            best_circle_pixels = pixels
        # make a decision about whether that circle is circly enough
        # cv2.imshow("Image processed", circle)
        # cv2.waitKey()

    # for the strict form of the classifier, I require that all of the detected circles
    # are in fact circles.  other classifiers may be more lenient
    return best_circle, best_classification, best_circle_pixels
Esempio n. 3
0
def main_svmlight():
    # copied:
    import svmlight
    import pdb
    
    training_data = syntheticData(30, 1)
    test_data     = syntheticData(30, 1)
    #training_data = __import__('data').train0
    #test_data = __import__('data').test0

    print 'HERE 0'
    print 'training_data is', training_data
    print 'test_data is', test_data

    # train a model based on the data
    #pdb.set_trace()
    print 'HERE 1'
    model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3)
    print 'HERE 2'

    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    svmlight.write_model(model, 'my_model.dat')
    print 'HERE 3'

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    #predictions = svmlight.classify(model, test_data)
    pdb.set_trace()
    predictions = svmlight.classify(model, training_data)
    print 'HERE 4'
    
    for p,example in zip(predictions, test_data):
        print 'pred %.8f, actual %.8f' % (p, example[0])
Esempio n. 4
0
    def predict(self, peptides,  **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        #group peptides by length and

        result = {self.name:{}}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue


            encoding = self.encode(peps)

            model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
            model = svmlight.read_model(model_path)

            pred = svmlight.classify(model, encoding.values())
            result[self.name] = {}
            for pep, score in itertools.izip(encoding.keys(), pred):
                    result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Esempio n. 5
0
    def score(self, feats):
        m = self._model

        if self._classtype == "classifier":
            x,_ = svm.gen_svm_nodearray(dict(feats))
            return int(svm.libsvm.svm_predict(m, x))

        elif self._classtype == "structured":
            maxscore = -sys.maxint
            maxidx = None
            for idx in range(len(feats)):
                dec_val = svmlight.classify(m, [(0, feats[idx])])
                if dec_val > maxscore:
                    maxscore = dec_val
                    maxidx = idx
            return maxidx

        elif self._classtype == "percrank":
            X = [None]*len(feats)
            Xisd = [0]*len(feats)
            Xisd[0] = 1
            for idx in range(len(feats)):
                X[idx] = set([f for f,v in feats[idx]])
            dec_vals = m.project(X, Xisd)
            return dec_vals.index(max(dec_vals))
Esempio n. 6
0
 def predict_proba(self, X):
     y = np.zeros(X.shape[0]).tolist()
     test_data = self.toSvmlight(X, y)
     scores = np.array(svmlight.classify(self.model, test_data))
     scores = 1 / (1 + np.exp(-scores))
     scores.shape = (len(scores),1)
     scores = np.hstack([1-scores, scores])
     return scores
Esempio n. 7
0
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'):
    # https://bitbucket.org/wcauchois/pysvmlight
    articles, total_token_count = preprocess_wsj(article_count, feature_functions)

    dictionary = Dictionary()
    dictionary.add_one('ZZZZZ')  # so that no features are labeled 0
    data = []
    for article in articles:
        for sentence in article:
            for tag, token_features in zip(sentence.def_tags, sentence.data):
                # only use def / indef tokens
                if tag in ('DEF', 'INDEF'):
                    features = dictionary.add(token_features)
                    features = sorted(list(set(features)))
                    feature_values = zip(features, [1]*len(features))
                    data.append((+1 if tag == 'DEF' else -1, feature_values))

    train, test = bifurcate(data, split, shuffle=True)

    # for corpus, name in [(train, 'train'), (test, 'test')]:
        # write_svm(corpus, 'wsj_svm-%s.data' % name)

    #####################
    # do svm in Python...
    model = svmlight.learn(train, type='classification', kernel=kernel)

    # svmlight.learn options
    # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'.
    # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'.
    # verbosity: set the verbosity level (default 0).
    # C: trade-off between training error and margin.
    # poly_degree: parameter d in polynomial kernel.
    # rbf_gamma: parameter gamma in rbf kernel.
    # coef_lin
    # coef_const
    # costratio (corresponds to -j option to svm_learn)
    svmlight.write_model(model, model_path)

    gold_labels, test_feature_values = zip(*test)
    # total = len(gold_labels)

    test_pairs = [(0, feature_values) for feature_values in test_feature_values]
    predictions = svmlight.classify(model, test_pairs)

    correct, wrong = matches(
        [(gold > 0) for gold in gold_labels],
        [(prediction > 0) for prediction in predictions])

    return dict(
        total_articles_count=len(articles),  # int
        total_token_count=total_token_count,  # int
        train_count=len(train),  # int
        test_count=len(test),  # int
        kernel=kernel,
        correct=correct,
        wrong=wrong,
        total=correct + wrong,
    )
Esempio n. 8
0
def test(test_data, fmodel_name):
  print ('[ test ] ===================')
  model = svmlight.read_model(fmodel_name)

  # classify the test data. this function returns a list of numbers, which represent
  # the classifications.
  predictions = svmlight.classify(model, test_data)
  for p in predictions:
      print '%.8f' % p
Esempio n. 9
0
def runSVMLight(trainName,testName, kerneltype, c_param = 1.0, gamma_param = 1.0, verbosity = 0):
    """
    converts data to python format only if not already in python format 
    (files in python format are of type list, otherwise they are filenames)
    
    inputs: trainName, either the training data in svm-light format or the name of the training data file in LIBSVM/sparse format
            testName, either the test data in svm-light format or the name of the test data file in LIBSVM/sparse format
            kerneltype, (str)the type of kernel (linear, polynomial, sigmoid, rbf, custom)
            c_param, the C parameter (default 1)
            gamma_param, the gamma parameter (default 1)
            verbosity, 0, 1, or 2 for less or more information (default 0)
    
    outputs: (positiveAccuracy, negativeAccuracy, accuracy)
    """
    if type(trainName) == list:
        trainingData = trainName
    else:
        trainingData = sparseToList(trainName)
        
    
    if type(testName) == list:
        testData = testName
    else:
        testData = sparseToList(testName)
        
    if verbosity == 2:
        print "Training svm......."

    # train a model based on the data
    model = svmlight.learn(trainingData, type='classification', verbosity=2, kernel=kerneltype, C = c_param, rbf_gamma = gamma_param )
    
    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    
    # if type(trainName) == list:
    #     svmlight.write_model(model, time.strftime('%Y-%m-%d-')+datetime.datetime.now().strftime('%H%M%S%f')+'_model.dat')
    # else:
    #     svmlight.write_model(model, trainName[:-4]+'_model.dat')
    
    if verbosity == 2:
        print "Classifying........"

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    predictions = svmlight.classify(model, testData)
    
    # for p in predictions:
    #     print '%.8f' % p
    
    correctLabels = correctLabelRemove(testData)

    # print 'Predictions:'
    # print predictions
    # print 'Correct Labels:'
    # print correctLabels

    return predictionCompare(predictions, correctLabels, verbosity)
Esempio n. 10
0
def tsvm_test0():
    # data processing
    data, target = load_svmlight_file('dataset/following.scale')
    data, target = shuffle(data, target)
    target = binarize(target)[:,0]

    cutoff = int(round(data.shape[0] * 0.8))

    train_data = data[:cutoff]
    train_target = target[:cutoff]

    transductive_train_data = data
    transductive_target = target.copy()
    transductive_target[cutoff:] = 0

    test_data = data[cutoff:]
    test_target = target[cutoff:]

    # convert the data into svmlight format
    svm_train_data = npToSVMLightFormat(train_data, train_target)
    svm_transductive_train_data = npToSVMLightFormat(transductive_train_data,
            transductive_target)
    svm_test_data = npToSVMLightFormat(test_data, test_target)

    print 'labels in the training data'
    print countLabels(svm_transductive_train_data).most_common()

    # svmlight routine
    model = svmlight.learn(svm_train_data,
            j=3.0, kernel='linear', type='classification', verbosity=0)
    trans_model = svmlight.learn(svm_transductive_train_data,
            j=3.0, kernel='linear', type='classification', verbosity=0)

    predictions = svmlight.classify(model, svm_test_data)
    trans_predictions = svmlight.classify(trans_model, svm_test_data)

    print 'inductive learning'
    print accuracy(predictions, test_target)
    print '(recall, precision)', recall_precision(predictions, test_target)

    print 'transductive learning'
    print accuracy(trans_predictions, test_target)
    print '(recall, precision)', recall_precision(trans_predictions, test_target)
Esempio n. 11
0
def trainAndTest(training, test):
    #trainingNames = [x[0] for x in training] # never used, but might be someday
    trainingData = [d.dataTuple() for d in training]
    testNames = [d.name for d in test]
    testData = [d.dataTuple() for d in test]
    testLabels = [d.label for d in test]
    
    model = svmlight.learn(trainingData)
    predictions = svmlight.classify(model,testData)
    return zip(predictions, testLabels, testNames)
Esempio n. 12
0
def create_classifications(models, test_set):
    '''
    For each supplied model, use svm light to classify the 
    test_set with that model
    '''
    classifications= {}
    for m in models.keys():
        classifications[m]= svmlight.classify(models[m], test_set)

    return classifications
Esempio n. 13
0
def test_model(model,ind,n=3):
    test = []
    for i in ind.get_pos_train_ind():
        item = os.listdir("pos")[i]
        test.append((1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("pos/"+item).read()).items() if fmap.hasFeature(item[0])]))
    for i in ind.get_neg_test_ind():
        item = os.listdir("neg")[i]
        test.append((-1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("neg/"+item).read()).items() if fmap.hasFeature(item[0])]))
    predictions = svmlight.classify(model, test)
    return predictions
def five_fold_validation(training_sets, validation_sets, c_value):
    total_accuracy= 0.0
    for i in range(len(training_sets)):

        model= svmlight.learn(training_sets[i], type='classification', C=c_value)
        classifications= svmlight.classify(model, validation_sets[i])
        predictions= change_to_binary_predictions(classifications)
        accuracy= find_accuracy(validation_sets[i], predictions)
        total_accuracy += accuracy[0]

    return total_accuracy/len(training_sets)
Esempio n. 15
0
def trainAndTest(training, test):
    #trainingNames = [x[0] for x in training] # never used, but might be someday
    trainingData = [(d[1],d[2]) for d in training]

    testNames = [d[0] for d in test]
    testData = [(d[1],d[2]) for d in test]
    testLabels = [d[1] for d in test]
    
    model = svm.learn(trainingData)
    predictions = svm.classify(model,testData)
    return zip(predictions, testLabels, testNames)
Esempio n. 16
0
    def classify_rank_svm(self, features):
        """Run rank_svm to rank the specified essay features (numpy matrix/array).
        Returns a vector of scores of the specified essays."""
        assert self.model is not None

        # Convert data into svmlight format [(label, [(feature, value), ...], query_id), ...]
        test_data = []
        for essay_ind,feat_vec in enumerate(features):
            feature_list = [(feat_ind+1,feat_val) for feat_ind,feat_val in enumerate(feat_vec)]
            test_data.append((0, feature_list, 1))

        return svmlight.classify(self.model, test_data)
Esempio n. 17
0
	def predict(self, x_test):
		if self.trained != True:
			raise Exception("first train a model")

		x = self.svmlfeaturise(x_test)
		y_score = []
		for j in xrange(len(self.models)):
			m = np.array(svmlight.classify(self.models[j], x))
			y_score.append(m)

		y_predicted = np.argmax(y_score, axis=0)
		return y_predicted
Esempio n. 18
0
 def predict(self, X):
   num_data = X.shape[0]
   scores = np.zeros((num_data, self.num_classes_,), dtype=np.float32)
   for i in xrange(self.num_classes_):
     scores[:, i] = svm.classify(
         self.model_[i],
         self.__data2docs(X, np.zeros((num_data,), dtype=np.float32)))
   if self.num_classes_ == 1:
     indices = (scores.ravel() > 0).astype(np.int)
   else:
     indices = scores.argmax(axis=1)
   return self.classes_()[indices]
Esempio n. 19
0
def my_cross_val_score(data_fold, train, c_p):

    scores = []
    for x, y in data_fold:
        data_x = collect_data_qid(x, train)
        data_y = collect_data_qid(y, train)
        model = SVC.learn(data_x, C=c_p, kernel='linear', type='ranking')
        pred = SVC.classify(model, data_y)
        scores.append(
            my_accus(data_y, pred)
            )
    return scores
Esempio n. 20
0
    def predict(self, peptides, alleles=None, **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)}

        #group peptides by length and
        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model

            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            encoding = self.encode(peps)

            for a in allales_string.keys():
                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(a,length))
                if not os.path.exists(model_path):
                    warnings.warn("No model exists for peptides of length %i or allele %s."%(length,
                                                                                            allales_string[a].name))
                    continue
                model = svmlight.read_model(model_path)


                model = svmlight.read_model(model_path)
                pred = svmlight.classify(model, encoding.values())
                result[allales_string[a]] = {}
                for pep, score in itertools.izip(encoding.keys(), pred):
                    result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your "
                             "epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Esempio n. 21
0
  def predict(self, dataset):
    assert self.svm_list is not None
    
    self._format_test_data(dataset)
    num_samples = dataset.getNumSamples()
    num_features = dataset.getNumFeatures()

    predictions = np.zeros((num_samples, 12))

    for month_ind in range(12):
      # import pdb;pdb.set_trace()
      predictions[:, month_ind] = svmlight.classify(self.svm_list[month_ind], self.formatted_data)
    return predictions
Esempio n. 22
0
 def __runSVMModels(self, img):
     inputfacepixels = list(img.getdata())
     inputface = asfarray(inputfacepixels)
     pixlistmax = max(inputface)
     inputfacen = inputface / pixlistmax        
     inputface = inputfacen - self.__imgdata.avgvals
     usub = self.__imgdata.eigenfaces[:self.__numFaces,:]
     input_wk = dot(usub, inputface.transpose()).transpose()
     data = [(0, self.__makeWeightTuplesList(input_wk))]
     predictions = list()
     for (name, model) in self.__models:
         pred = svmlight.classify(model, data)
         predictions.append((name,pred[0]))
     return predictions    
Esempio n. 23
0
    def _get_svm_classification(self, featureset):
        """
        given a set of features, classify them with our trained model
        and return a signed float

        :param featureset: a dict of feature/value pairs in NLTK format, representing a single instance
        """
        instance_to_classify = (0, map_features_to_svm(featureset, self._svmfeatureindex))
        if self._verbose:
            print 'instance', instance_to_classify
        # svmlight.classify expects a list; this should be taken advantage of when writing SvmClassifier.batch_classify / .batch_prob_classify.
        # it returns a list of floats, too.
        [prediction] = svmlight.classify(self._model, [instance_to_classify])
        return prediction
Esempio n. 24
0
    def predict(self, value, converted=False):
        """Evaluates a single value against the training data.

        Args:
            value: List-like object with same dimensionality used for training
                or the result of using convert_value if converted=True.
            converted: If True then the input is in the correct internal format

        Returns:
            Sorted (descending) list of (confidence, label)
        """
        if not converted:
            value = self.convert_value(value)
        conf = svmlight.classify(self._m, [(0, value)])[0]
        return [(math.fabs(conf), cmp(conf, 0))]
Esempio n. 25
0
	def get_weather_tweets(self, tweets):
		weather_tweets = []
		if not isinstance(tweets, list):
			tweets = [tweets]
		count = 0
		for tweet in tweets:
			count += 1
			formatted_tweet = self.parser.stem_sentence_porter(tweet)
			formatted_tweet = self.format_tweet_for_svmlight(formatted_tweet)
			c = svmlight.classify(self.is_weather_model, formatted_tweet)
			if count%100 == 0:
				print count
			if c[0] < 0:
				weather_tweets.append(tweet)
		return weather_tweets
Esempio n. 26
0
File: SVM.py Progetto: FRED-2/Fred2
    def predict(self, peptides,  **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        #group peptides by length and
        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {self.name: {}}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            peps = list(peps)
            for i in xrange(0, len(peps), chunksize):
                encoding = self.encode(peps[i:i+chunksize])

                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
                model = svmlight.read_model(model_path)

                pred = svmlight.classify(model, encoding.values())
                for pep, score in itertools.izip(encoding.keys(), pred):
                        result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Esempio n. 27
0
def rec_char(div_img):
    """
    切割后的单个字符识别
    """
    result = ""
    test = binary(div_img)
    test = chformat(test)
    for i in range(10):
        model = svmlight.read_model("model/" + str(i))
        prediction = svmlight.classify(model, test)
        # print prediction
        if prediction[0] > 0:
            result = str(i)
            # print prediction[0]

    return result
Esempio n. 28
0
 def predict(self, x):
     if not self.fitted:
         raise Exception('Not fitted yet')
     if len(self.models) < 1:
         raise Exception("len(self.models) < 1")
     
     feats = toSVMLightFeatures(x)
     input = []
     for i in range(len(feats)):
         input.append((0, feats[i]))
     
     predictions = []
     for i in range(len(self.models)):
         predictions.append(np.array(svmlight.classify(self.models[i], input)))
     predictions = np.argmax(np.vstack(tuple(predictions)), axis=0)
     
     return predictions
Esempio n. 29
0
def zrank(aids, topic, fmodel_name):
  rerank_data = init_rerank_data(aids, topic)

  print ('[ zrank ] ===================')
  model = svmlight.read_model(fmodel_name)

  predictions = svmlight.classify(model, rerank_data)

  aid_score = zip( [x[0] for x in rerank_data ], predictions)
  aid_score.sort(key = lambda tup : tup[1], reverse=True)
  
  with open(RERANK_RESULT + '_' + topic, 'w') as f :
    pprint.pprint(aid_score, f)

  ZC.dump_cache()

  return [x[0] for x in aid_score]
Esempio n. 30
0
    def predict(self, X):
        y = np.zeros(X.shape[0]).tolist()
        test_data = self.toSvmlight(X, y)
        all_data = self.train_data + test_data

        if self.class_dist:
            pos_ratio = self.class_dist[1]
            self.model = svmlight.learn(all_data, verbosity=1, transduction_posratio=pos_ratio)
            # self.model = svmlight.learn(all_data)
        else:
            self.model = svmlight.learn(self.train_data)

        predictions = np.array(svmlight.classify(self.model, test_data))
        predictions[predictions > 0] = 1
        predictions[predictions <= 0] = 0
        # from collections import Counter
        # print Counter(predictions)

        return predictions
Esempio n. 31
0
def test_model(model, ind, n=3):
    test = []
    for i in ind.get_pos_train_ind():
        item = os.listdir("pos")[i]
        test.append(
            (1, [(fmap.getID(item[0]), item[1])
                 for item in ngrams.ngrams(n,
                                           open("pos/" + item).read()).items()
                 if fmap.hasFeature(item[0])]))
    for i in ind.get_neg_test_ind():
        item = os.listdir("neg")[i]
        test.append((-1, [
            (fmap.getID(item[0]), item[1])
            for item in ngrams.ngrams(n,
                                      open("neg/" + item).read()).items()
            if fmap.hasFeature(item[0])
        ]))
    predictions = svmlight.classify(model, test)
    return predictions
Esempio n. 32
0
def test_svmlight():
    training_data = [(1, [(1, 2), (2, 5), (3, 6), (5, 1), (4, 2), (6, 1)]),
                     (1, [(1, 2), (2, 1), (3, 4), (5, 3), (4, 1), (6, 1)]),
                     (1, [(1, 2), (2, 2), (3, 4), (5, 1), (4, 1), (6, 1)]),
                     (1, [(1, 2), (2, 1), (3, 3), (5, 1), (4, 1), (6, 1)]),
                     (-1, [(1, 2), (2, 1), (3, 1), (5, 3), (4, 2), (6, 1)]),
                     (-1, [(1, 1), (2, 1), (3, 1), (5, 3), (4, 1), (6, 1)]),
                     (-1, [(1, 1), (2, 2), (3, 1), (5, 3), (4, 1), (6, 1)]),
                     (-1, [(1, 1), (2, 1), (3, 1), (5, 1), (4, 3), (6, 1)]),
                     (-1, [(1, 2), (2, 1), (3, 1), (5, 2), (4, 1), (6, 5)]),
                     (-1, [(7, 10)])]

    test_data = [(0, [(1, 2), (2, 6), (3, 4), (5, 1), (4, 1), (6, 1)]),
                 (0, [(1, 2), (2, 6), (3, 4)])]

    model = svmlight.learn(training_data, type='classification', verbosity=0)
    svmlight.write_model(model, 'my_model.dat')
    predictions = svmlight.classify(model, test_data)
    for p in predictions:
        print '%.8f' % p
Esempio n. 33
0
def performDoc2VecJudgement(trainingData, testData):
    doc2vecModel = Doc2Vec.load("/Users/Matteo/Desktop/doc2vec_models/final_model")

    trainingFeatureVectors = [(1 if doc[0] == 'POS' else -1, doc2vecModel.infer_vector(doc[2])) for doc in trainingData]
    testFeatureVectors = [(0, doc2vecModel.infer_vector(doc[2])) for doc in testData]

    formattedTrainingFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in trainingFeatureVectors]
    formattedTestFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in testFeatureVectors]

    svmModel = svmlight.learn(formattedTrainingFeatureVectors)
    judgements = svmlight.classify(svmModel, formattedTestFeatureVectors)

    predictions = []

    i = 0
    for (sentiment, fileName, features) in testData:
        predictions.append((judgements[i], sentiment, fileName))

        i += 1

    return predictions
Esempio n. 34
0
    def evaluate(self, docs):
        corrects = []
        data = []
        for classification, fname, wordlist in docs:
            c = Counter(wordlist)
            l = []
            for word, v in c.iteritems():
                if self.word_ids.get(word, 0) == 0:
                    self.word_ids[word] = self.curr_id
                    self.curr_id += 1

                l.append((self.word_ids.get(word), v))
            l.sort(key=lambda x: x[0])
            data.append((1 if classification == "POS" else -1, l))
        results = svmlight.classify(self.model, data)

        for i, r in enumerate(results):
            if (r < 0 and docs[i][0] == "NEG") or (r > 0 and docs[i][0] == "POS"):
                corrects.append(True)
            else:
                corrects.append(False)
        return (corrects, float(sum(corrects))/len(corrects))
Esempio n. 35
0
def main_svmlight():
    # copied:
    import svmlight
    import pdb

    training_data = syntheticData(30, 1)
    test_data = syntheticData(30, 1)
    #training_data = __import__('data').train0
    #test_data = __import__('data').test0

    print 'HERE 0'
    print 'training_data is', training_data
    print 'test_data is', test_data

    # train a model based on the data
    #pdb.set_trace()
    print 'HERE 1'
    model = svmlight.learn(training_data,
                           type='regression',
                           kernelType=2,
                           verbosity=3)
    print 'HERE 2'

    # model data can be stored in the same format SVM-Light uses, for interoperability
    # with the binaries.
    svmlight.write_model(model, 'my_model.dat')
    print 'HERE 3'

    # classify the test data. this function returns a list of numbers, which represent
    # the classifications.
    #predictions = svmlight.classify(model, test_data)
    pdb.set_trace()
    predictions = svmlight.classify(model, training_data)
    print 'HERE 4'

    for p, example in zip(predictions, test_data):
        print 'pred %.8f, actual %.8f' % (p, example[0])
Esempio n. 36
0
    def score(self, websites):
        """
        Note: Use all unlabelled websites as negatives but no more than 20x the positives """
        """
        neg = np.array([w.get_vsm(self.vectorizer, self.text_type) for w in websites])
        neg = self._convert_to_svmlight(neg, -1)

        if not self.model:
            max_neg = min(20*len(self.pos), len(neg))
            train = neg[:max_neg]
            print "Number of unlabelled examples: ", len(train)
            print "Training the classifier..."
            train.extend(self.pos) 
            self.model = svmlight.learn(train, type='classification', verbosity=0, cost_ratio=self.cost_ratio, C=self.c)
        """
        print "Scoring..."
        if not self.clf:
            print "Error. Classifier must be trained"
        test = np.array(
            [w.get_vsm(self.vectorizer, self.text_type) for w in websites])
        test = self._convert_to_svmlight(test, -1)  # The label does not matter
        predicts = svmlight.classify(self.clf, test)
        results = [(websites[i], predicts[i]) for i in xrange(len(websites))]
        return results
Esempio n. 37
0
def svm(train_docs, train_labels, test_docs, params):

    kernel, param = params
    train_docs_svm = to_svmlight_format(train_docs, (1 if l == 1 else -1
                                                     for l in train_labels))
    test_docs_svm = to_svmlight_format(test_docs, np.zeros(test_docs.shape[0]))

    if kernel == 'rbf':
        model = svmlight.learn(train_docs_svm,
                               type='classification',
                               kernel='rbf',
                               rbf_gamma=param)
    elif kernel == 'poly' or kernel == 'polynomial':
        model = svmlight.learn(train_docs_svm,
                               type='classification',
                               kernel='polynomial',
                               poly_degree=param)
    else:
        raise ValueError('Unsupported svm parameters: ' + str(params))

    margins = svmlight.classify(model, test_docs_svm)
    predict_labels = [1 if p > 0 else 0 for p in margins]

    return predict_labels
Esempio n. 38
0
    rady_list = []

    iteration = 1
    max_pred = -1

    while len(img[0]) >= window_width and len(img) >= window_height:

        print iteration
        hog = feature.hog(img,
                          orientations=orientations,
                          pixels_per_cell=(cell, cell),
                          cells_per_block=(block, block),
                          normalise=True)
        testing_data_keys, testing_data_tuples = split_vector(
            hog.tolist(), len(img[0]), len(img))
        predictions = svmlight.classify(model, testing_data_tuples)

        scale = (1.0 / scale_factor)**iteration
        for i in xrange(len(predictions)):
            prediction = float(predictions[i])

            if prediction >= positive_threshold:
                max_pred = max(max_pred, prediction)
                coordinate = testing_data_keys[i].split("^")
                centerx = int(int(coordinate[0]) * scale)
                centery = int(int(coordinate[1]) * scale)
                radx = int((window_width / 2) * scale)
                rady = int((window_height / 2) * scale)

                centerx_list.append(centerx)
                centery_list.append(centery)
Esempio n. 39
0
 def predict(self, sample_):
     pred = SVC.classify(self.model, sample_)
     return pred
Esempio n. 40
0
def run_svm(article_count,
            feature_functions,
            kernel='polynomial',
            split=0.9,
            model_path='svm.model'):
    # https://bitbucket.org/wcauchois/pysvmlight
    articles, total_token_count = preprocess_wsj(article_count,
                                                 feature_functions)

    dictionary = Dictionary()
    dictionary.add_one('ZZZZZ')  # so that no features are labeled 0
    data = []
    for article in articles:
        for sentence in article:
            for tag, token_features in zip(sentence.def_tags, sentence.data):
                # only use def / indef tokens
                if tag in ('DEF', 'INDEF'):
                    features = dictionary.add(token_features)
                    features = sorted(list(set(features)))
                    feature_values = zip(features, [1] * len(features))
                    data.append((+1 if tag == 'DEF' else -1, feature_values))

    train, test = bifurcate(data, split, shuffle=True)

    # for corpus, name in [(train, 'train'), (test, 'test')]:
    # write_svm(corpus, 'wsj_svm-%s.data' % name)

    #####################
    # do svm in Python...
    model = svmlight.learn(train, type='classification', kernel=kernel)

    # svmlight.learn options
    # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'.
    # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'.
    # verbosity: set the verbosity level (default 0).
    # C: trade-off between training error and margin.
    # poly_degree: parameter d in polynomial kernel.
    # rbf_gamma: parameter gamma in rbf kernel.
    # coef_lin
    # coef_const
    # costratio (corresponds to -j option to svm_learn)
    svmlight.write_model(model, model_path)

    gold_labels, test_feature_values = zip(*test)
    # total = len(gold_labels)

    test_pairs = [(0, feature_values)
                  for feature_values in test_feature_values]
    predictions = svmlight.classify(model, test_pairs)

    correct, wrong = matches([(gold > 0) for gold in gold_labels],
                             [(prediction > 0) for prediction in predictions])

    return dict(
        total_articles_count=len(articles),  # int
        total_token_count=total_token_count,  # int
        train_count=len(train),  # int
        test_count=len(test),  # int
        kernel=kernel,
        correct=correct,
        wrong=wrong,
        total=correct + wrong,
    )
Esempio n. 41
0
 def predict(self, X):
     dataList = self.__createData(X)
     return numpy.array(svmlight.classify(self.model, dataList))
Esempio n. 42
0
        print("Training it=", it, "cost-factor=", cost_factor + 1)

        model = svmlight.learn(list(train),
                               type='classification',
                               verbosity=0,
                               costratio=cost_factor +
                               1)  ## costratio = cost-factor

        if dump == "yes":
            svmlight.write_model(
                model,
                "models/model_" + dataset + "_" + features + "_it" + str(it) +
                "_cost_fact" + str(cost_factor + 1) + "_" + ts + ".dat")

        predictions = svmlight.classify(model, test)
        print("Predicting it=", it, "cost-factor=", cost_factor + 1)

        tp, tn, fp, fn = evaluate(predictions)
        accuracies.append(
            weighted_accuracy(cost_factor + 1, tn, tp, fn, fp) * 100)
        predictions = np.array(predictions)
        predictions[predictions < 0] = -1
        predictions[predictions > 0] = 1
        f1_micro.append(
            f1_score(val, predictions, average='micro')
        )  # micro: calculates metrics totally by counting the total true positives, false negatives and false positives
        cl = f1_score(val, predictions,
                      average=None)  # none: returns scores for each class
        f1_rel.append(cl[0])
        f1_unrel.append(cl[1])
Esempio n. 43
0
predictions_by_code = {}

def load_classifier(clf_i):
    clf = svmlight.read_model(os.path.join(clf_directory,str(clf_i)))
    return clf


########################################
## Make predictions for each of the ICD9 codes

classifiers = {}
for i in range(I):
    if os.path.exists(os.path.join(clf_directory,str(i))):
        print i, 'of', I
        clf = load_classifier(i)
        preds = np.array(svmlight.classify(clf, zip(np.zeros(len(test_text)),test_text)))
        predictions_by_code[i] = [j for j in range(len(preds)) if preds[j] > 0]

# Invert predictions
predictions = [[root] for x in range(TD)]
for code, docs in predictions_by_code.iteritems():
    for doc in docs:
        predictions[doc].append(code)


# Prune the predictions to respect the conditional classification
# constraint (all ancestors must be predicted true for a child to be
# predicted true)
for i in range(len(predictions)):
    doc_predictions = set(list(predictions[i]))
    filtered_predictions = []
Esempio n. 44
0
File: SVM.py Progetto: koalive/Fred2
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Esempio n. 45
0
def load_classifier(clf_i):
    clf = svmlight.read_model(os.path.join(clf_directory, str(clf_i)))
    return clf


########################################
## Make predictions for each of the ICD9 codes

classifiers = {}
for i in range(I):
    if os.path.exists(os.path.join(clf_directory, str(i))):
        print i, 'of', I
        clf = load_classifier(i)
        preds = np.array(
            svmlight.classify(clf, zip(np.zeros(len(test_text)), test_text)))
        predictions_by_code[i] = [j for j in range(len(preds)) if preds[j] > 0]

# Invert predictions
predictions = [[root] for x in range(TD)]
for code, docs in predictions_by_code.iteritems():
    for doc in docs:
        predictions[doc].append(code)

# Prune the predictions to respect the conditional classification
# constraint (all ancestors must be predicted true for a child to be
# predicted true)
for i in range(len(predictions)):
    doc_predictions = set(list(predictions[i]))
    filtered_predictions = []
    for prediction in doc_predictions: