Ejemplo n.º 1
0
def self_training2(X, y, X_unLabeled, param, th):
    model = svmutil.svm_train(svmutil.svm_problem(x=X.tolist(), y=y.tolist()), param)
    obj = model.get_objective_value()[0]
    itr_num = 0

    while True:
        predicted_labels = np.array(svmutil.svm_predict(x=X_unLabeled.tolist(),
                                                        y=[1]*len(X_unLabeled),
                                                        m=model,
                                                        options="-q")[0])
        model = svmutil.svm_train(svmutil.svm_problem(x=np.append(X, X_unLabeled, axis=0).tolist(),
                                                      y=np.append(y, predicted_labels).tolist()), param)
        obj_new = model.get_objective_value()[0]
        itr_num += 1

        if abs(obj_new - obj) < th:
            break
        else:
            obj = obj_new

    y_unlabeled = ma.array(data=np.array(svmutil.svm_predict(x=X_unLabeled.tolist(),
                                                             y=[1]*len(X_unLabeled),
                                                             m=model,
                                                             options="-q")[0]),
                           mask=[True]*len(X_unLabeled))

    return model, y_unlabeled, obj_new, itr_num
Ejemplo n.º 2
0
 def predict(self, features, const_intercept=False):
     """
     Run prediction using svm_predict.
     :param features: List of features, produced by svr_features_from_sequence
     :param model: A loaded svm model (from load_model)
     :param const_intercept: if true, add a 1:1 term at the beginning of the matrix. Must match model's term
     :return: triple of predictions, accuracy, and values from svm_predict.
     """
     feature_size = len(features)
     if const_intercept:
         feature_size += 1  # If we are to use a const intercept term, we will have one more feature
     #if 'size' in self.modeldict and self.modeldict['size'] != feature_size:
     #    # vm Exception: Model size 1536 does not match feature size 384.
     #    raise Exception('Model size {} does not match feature size {}.\nPlease check parameters for width, '
     #                    'kmers, and const_intercept'.format(self.modeldict['size'], feature_size))
     svm_matrix = dict()
     # Build the dictionary that corresponds to the matrix file
     offset = 1  # svm_matrix is a dictionary of index to value, starting at 1
     if const_intercept:
         svm_matrix[offset] = 1
         offset += 1
     for i, feature in enumerate(features):
         svm_matrix[i + offset] = feature['value']
     predictions = svmutil.svm_predict([1.0], [svm_matrix],
                                       self.modeldict['model'], '-q')
     return predictions
Ejemplo n.º 3
0
def test_libsvm_trains_correctly(heart_scale):
    y, x = heart_scale
    m = svm_train(y[:200], x[:200], '-c 4')
    p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m)
    assert p_acc == pytest.approx(
        [84.28571428571429, 0.6285714285714286, 0.463744141163496])
    assert p_label[:6] == [-1.0, 1.0, 1.0, -1.0, 1.0, -1.0]
Ejemplo n.º 4
0
def predict_emotion_paef():
    model = svmutil.svm_load_model(
        "C:/Users/Admin/PycharmProjects/Emotion_Detection/trained_models/paef_models/artphoto_train.txt.model"
    )
    mapping = {
        0: "happy",
        2: "fear",
        3: "excitement",
        4: "disgust",
        6: "anger",
        7: "sad"
    }

    with open('image_data.txt', 'r') as feature:
        for line in feature:
            line = line.strip()
            feature_vector = line.split()[1:]
            feature_vector = [
                float(item.split(":")[-1]) for item in feature_vector
            ]

        p_labs, p_acc, p_vals = svmutil.svm_predict([0], [feature_vector],
                                                    model)
        print("p_labs")
        print(p_labs)
        lab = p_labs[0]
        return mapping[int(lab)]
Ejemplo n.º 5
0
def sliding_window_search(img,
                          motion_img,
                          svm,
                          method,
                          feature,
                          sbox_height,
                          sbox_width,
                          slide=10,
                          threshold=0.2):
    detections = []
    det_count = 0
    height, width = img.shape

    for i in range(1, (height - sbox_height), slide):
        for j in range(1, (width - sbox_width), slide):
            img_patch = img[i:i + sbox_height - 1, j:j + sbox_width - 1]
            #motion_patch = motion_img[i:i+sbox_height-1, j:j+sbox_width-1]
            #img_feat = extract(img_patch, motion_patch, method, feature)
            img_feat = extract(img_patch, None, method, feature)
            y = []
            y.append(0)
            x = []
            x.append(img_feat)
            plabel, acc, pr = svm_predict(y, x, svm)
            if pr[0][0] > threshold:
                #print "pr: " + str(pr[0][0])
                detections.append(
                    [i, i + sbox_height - 1, j, j + sbox_width - 1, pr[0][0]])
    return detections
Ejemplo n.º 6
0
    def accuracy(self):
        tweets = common.get_filtered_training_data(self.training_datafile)
        test_tweets = []
        for (t, l) in tweets:
            words_filtered = [
                e.lower() for e in t.split() if (common.is_ascii(e))
            ]
            test_tweets.append(words_filtered)

        test_feature_vector = helper.get_SVM_feature_vector(
            self.feature_list, test_tweets)
        p_labels, p_accs, p_vals = svm_predict([0] * len(test_feature_vector),
                                               test_feature_vector,
                                               self.classifier)
        count = 0
        total, correct, wrong = 0, 0, 0
        self.accuracy = 0.0
        for (t, l) in tweets:
            label = p_labels[count]
            if (label == 0):
                label = 'positive'
            elif (label == 1):
                label = 'negative'
            elif (label == 2):
                label = 'neutral'

            if (label == l):
                correct += 1
            else:
                wrong += 1
            total += 1
            count += 1
        accuracy = (float(correct) / total) * 100
        print 'Total = {}, Correct = {}, Wrong = {}, Accuracy = {}'.format(
            total, correct, wrong, accuracy)
Ejemplo n.º 7
0
 def predict(self, X, options=""):
     if hasattr(X, 'shape'):
         X = X.tolist()
     res = svmutil.svm_predict([0 for i in X],
                               list(X),
                               self.model,
                               options=options)
     return res
Ejemplo n.º 8
0
 def evaluate(self, input_data_path):
     with commons.PhaseLogger("LIBSVM.evaluate.read_problem"):
         Y, X = svmutil.svm_read_problem(input_data_path + "\\Test.txt")
     with commons.PhaseLogger("LIBSVM.evaluate.predict"):
         p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model)
     acc, mse, _ = p_acc
     logging.info("[%s]: evaluate with Acc[%.4f] Mse[%.4f]" %
                  (self._get_class_name(), acc, mse))
Ejemplo n.º 9
0
    def _label(self, x):
        if isinstance(x, (list, tuple, numx.ndarray)):
            y = [0] * len(x)
            p_labs, p_acc, p_vals = libsvmutil.svm_predict(
                y, x.tolist(), self.model)

            return numx.array(p_labs)
        else:
            msg = "Data must be a sequence of vectors"
            raise mdp.NodeException(msg)
Ejemplo n.º 10
0
def process(training_file, test_file, check, draw):
    # Load training data.
    with open(training_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)

    # Convert data to lists for libsvm.
    class_1 = map(list, class_1)
    class_2 = map(list, class_2)
    labels = list(labels)
    samples = class_1 + class_2
    problem = svmutil.svm_problem(labels, samples)
    # Don't print to stdout, use radial basis functions.
    param = svmutil.svm_parameter('-q -t 2')
    model = svmutil.svm_train(problem, param)

    # Load test data.
    with open(test_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)

    class_1 = map(list, class_1)
    class_2 = map(list, class_2)
    labels = list(labels)

    if check:
        # Sadly, this prints to stdout too :-/
        svmutil.svm_predict(labels, class_1 + class_2,
                            model)  # Prints accuracy.

    if draw:

        def classify(x, y, model=model):
            return array(
                svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                                    model)[0])

        imtools.plot_2d_boundary(
            [-6, 6, -6, 6], [array(class_1), array(class_2)], classify,
            [1, -1])
        show()
Ejemplo n.º 11
0
def calcFtrs(seq):
    ftrs = []

    stc, mfe, efe, cstc, cmfe, cdst, frq, div, bpp = FOLDER.fold(seq)

    kmer = kContent(seq, 1)
    atcg = (kmer['a'] + kmer['t']) / (kmer['a'] + kmer['t'] + kmer['c'] + kmer['g'])
    if kmer['a'] + kmer['t'] == 0:
        at = 0
    else:
        at = kmer['a'] / (kmer['a'] + kmer['t'])
    if kmer['c'] + kmer['g'] == 0:
        cg = 0
    else:
        cg = kmer['c'] / (kmer['c'] + kmer['g'])

    svmftrs = numpy.array((atcg, at, cg))

    mfeavg_mdl = svmutil.svm_load_model(mfeavg200)
    mfestd_mdl = svmutil.svm_load_model(mfestd200)
    efeavg_mdl = svmutil.svm_load_model(efeavg200)
    # efestd_mdl = svmutil.svm_load_model(efestd200)

    mfeavg_avg, mfeavg_std = loadRange(mfeavg200 + '.rng')
    mfestd_avg, mfestd_std = loadRange(mfestd200 + '.rng')
    efeavg_avg, efeavg_std = loadRange(efeavg200 + '.rng')
    # efestd_avg, efestd_std = loadRange(efestd200 + '.rng')

    mfe_avg = svmutil.svm_predict([0],
                                  [((svmftrs - mfeavg_avg) / mfeavg_std).tolist()], mfeavg_mdl, options='-b 1')[0][0]
    mfe_std = svmutil.svm_predict([0],
                                  [((svmftrs - mfestd_avg) / mfestd_std).tolist()], mfestd_mdl, options='-b 1')[0][0]
    efe_avg = svmutil.svm_predict([0],
                                  [((svmftrs - efeavg_avg) / efeavg_std).tolist()], efeavg_mdl, options='-b 1')[0][0]
    # efe_std = svmutil.svm_predict([0],
    # [((svmftrs - efestd_avg) / efestd_std).tolist()], efestd_mdl, options='-b 1')[0][0]

    ftrs.append(mfe - mfe_avg)
    ftrs.append((mfe - mfe_avg) / mfe_std)
    ftrs.append(efe - efe_avg)
    # ftrs.append((efe - efe_avg) / efe_std)

    return numpy.array(ftrs)
Ejemplo n.º 12
0
def process_file(source, target):
    with open(source, 'rb') as i:
        reader = csv.reader(i)
        (before, chords) = list_spectrum_data(reader, components=60, allow_no_chord=True)
    sda_features = through_sda_layers(sda, before)
    # append chord labels as integer numbers
    sda_features = [x.tolist() for x in sda_features]
    (labels, acc, vals) = svmutil.svm_predict([0] * len(sda_features), sda_features, model)
    result = [chord_list[int(x)] for x in labels]
    with open(target, 'wb') as o:
        o.write(','.join(result))
Ejemplo n.º 13
0
 def run(self, mode):
     if mode <= 2:
         model = self.Training('-t {} -b 1'.format(mode), self.Plot_Y,
                               self.Plot_X)
         sv = model.get_sv_indices()
         (label, _, _) = svmutil.svm_predict(self.Plot_Y, self.Plot_X,
                                             model)
         svmutil.svm_predict(self.Y_test, self.X_test, model)
         self.Visualization([i - 1 for i in sv], label)
     # elif mode==3:
     #     (C,gamma)=self.GridSearch()
     #     model=self.Training('-t 2 -c {} -g {}'.format(C,gamma),self.Plot_Y,self.Plot_X)
     #     sv=model.get_sv_indices()
     #     (label,_,_)=svmutil.svm_predict(self.Plot_Y,self.Plot_X,model)
     #     self.Visualization([i-1 for i in sv],label)
     elif mode == 3:
         data = self.UserDefineKernel(self.Plot_X, self.Plot_X, 0.1)
         model = self.Training('-t 4 -c {} -g {} -b 1'.format(0.01, 0.1),
                               self.Plot_Y, data, True)
         sv = model.get_sv_indices()
         (label, _, _) = svmutil.svm_predict(self.Plot_Y, data, model)
         self.Visualization([i - 1 for i in sv], label)
Ejemplo n.º 14
0
def process(training_file, test_file, check, draw):
  # Load training data.
  with open(training_file) as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)

  # Convert data to lists for libsvm.
  class_1 = map(list, class_1)
  class_2 = map(list, class_2)
  labels = list(labels)
  samples = class_1 + class_2
  problem = svmutil.svm_problem(labels, samples)
  # Don't print to stdout, use radial basis functions.
  param = svmutil.svm_parameter('-q -t 2')
  model = svmutil.svm_train(problem, param)

  # Load test data.
  with open(test_file) as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)

  class_1 = map(list, class_1)
  class_2 = map(list, class_2)
  labels = list(labels)

  if check:
    # Sadly, this prints to stdout too :-/
    svmutil.svm_predict(labels, class_1 + class_2, model)  # Prints accuracy.

  if draw:
    def classify(x, y, model=model):
      return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                                       model)[0])
    imtools.plot_2d_boundary(
        [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1])
    show()
Ejemplo n.º 15
0
def train_and_predict(x, y, kernel, option=''):
    kernel_type = {
        'linear': '0',
        'polynomial': '1',
        'rbf': '2',
        'sigmoid': '3',
        'self-defined': '4'
    }
    x_train, x_test = x
    y_train, y_test = y
    param = '-q -t ' + kernel_type[
        kernel] + option  # -q: suppress the output in libsvm
    m = svm_train(y_train, x_train, param)
    pred, pred_acc, pred_val = svm_predict(y_test, x_test, m)
Ejemplo n.º 16
0
    def _calculate_score(self, scaled_feature):
        """Calculate score from scaled brisque feature.

        :param scaled_feature: Scaled brisque feature.
        :type scaled_feature: np.ndarray
        """
        x, idx = gen_svm_nodearray(
            scaled_feature.tolist(),
            isKernel=(self._model.param.kernel_type == 'PRECOMPUTED')
        )
        nr_classifier = 1
        prob_estimates = (c_double * nr_classifier)()

        return svmutil.svm_predict(self._model, x, prob_estimates)
Ejemplo n.º 17
0
 def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True):
     with commons.PhaseLogger("LIBSVM.train.read_problem"):
         Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt")
         prob = svmutil.svm_problem(Y, X)
         #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt")
     self._params = svmutil.svm_parameter(params)
     with commons.PhaseLogger("LIBSVM.train.svm_train"):
         self._model = svmutil.svm_train(prob, self._params)
     self._init = True
     if is_eval is True:
         p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model)
         acc, mse, _ = p_acc
         logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" %
                      (self._get_class_name(), acc, mse))
Ejemplo n.º 18
0
    def classify_set(self, featuresets):
        """
			Classifies the specified featuresets.

			The featuresets parameter must have the format [ [feature] ]
			Returns the most probable label of each item in according to this classifier,
			where the returned value has the format [label]
		"""
        x = [self.__adapt_featureset(featureset) for featureset in featuresets]
        # create a fake labels array for the library
        y = [0] * len(x)
        p_labels = svm_predict(y, x, self.__model, "-q")[0]

        # convert the label's ids into the original form
        return [self.__labels[int(p_label_id)] for p_label_id in p_labels]
Ejemplo n.º 19
0
	def classify_set(self, featuresets):
		"""
			Classifies the specified featuresets.

			The featuresets parameter must have the format [ [feature] ]
			Returns the most probable label of each item in according to this classifier,
			where the returned value has the format [label]
		"""
		x = [self.__adapt_featureset(featureset) for featureset in featuresets]
		# create a fake labels array for the library
		y = [0]*len(x)
		p_labels = svm_predict(y, x, self.__model, "-q")[0]

		# convert the label's ids into the original form
		return [self.__labels[int(p_label_id)] for p_label_id in p_labels]
Ejemplo n.º 20
0
def sliding_window_search(img, sbox_height, sbox_width, threshold):
	detections = []
	height, width = img.shape

	for i in range(1, (height - sbox_height), slide):
		for j in range(1, (width - sbox_width), slide):
			img_patch = img[i:i+sbox_height-1, j:j+sbox_width-1]
			img_feat = extract(img_patch)
			vw_hist = calculate_visual_word(img_feat)

			plabel, acc, pr = svm_predict([0], [vw_hist], svm)
			#time.sleep(10000)
			
			if pr[0][0] > threshold:
				detections.append([i, i+sbox_height-1, j, j+sbox_width-1, pr[0][0]])
	return detections
Ejemplo n.º 21
0
def eval(dat):
    ans = []
    for i in range(len(config.MODEL_LABEL) - 1):
        res = svmutil.svm_predict([], [dat], models[i], options="-q -b 1")
        #print(res[0][0])
        prob = res[2][0][0]
        #print(config.MODEL_LABEL[i], res)
        if res[0][0] > 0 and prob > 0.9:
            ans.append([i, prob, res])
        elif i == 2 and res[0][0] > 0 and prob > 0.6:
            ans.append([i, prob, res])
    ans.sort(key=lambda x: x[1])
    if ans:
        print(ans)
        return (ans[0][0], ans[0][1])
    else:
        return (-1, 0)
Ejemplo n.º 22
0
    def classify(self, data):
        len_tweets = len(data)
        results = {}

        neut_count = [0] * len_tweets
        pos_count = [0] * len_tweets
        neg_count = [0] * len_tweets

        orig_tweets = self.get_uniq_data(data)
        tweets = self.get_processed_tweets(orig_tweets)

        for i in tweets:
            tweet = tweets[i]
            test_tweets = []
            res = {}
            for words in tweet:
                words_filtered = [
                    e.lower() for e in words.split() if (common.is_ascii(e))
                ]
                test_tweets.append(words_filtered)
            test_feature_vector = helper.get_SVM_feature_vector(
                self.feature_list, test_tweets)
            p_labels, p_accs, p_vals = svm_predict(
                [0] * len(test_feature_vector), test_feature_vector,
                self.classifier)
            count = 0
            for t in tweet:
                label = p_labels[count]
                if (label == 0):
                    label = 'positive'
                    pos_count[i] += 1
                elif (label == 1):
                    label = 'negative'
                    neg_count[i] += 1
                elif (label == 2):
                    label = 'neutral'
                    neut_count[i] += 1
                result = {
                    'text': t,
                    'tweet': orig_tweets[i][count],
                    'label': label
                }
                res[count] = result
                count += 1
            results[i] = res
        return results
Ejemplo n.º 23
0
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr):
    data_arr = data_format(tr_data_arr)
    prob = svm_problem(tr_label_arr, data_arr)
    # 以下参数c和g通过交叉验证得到
    param = svm_parameter('-c 2048.0 -g 0.001953125')
    svm_model = svm_train(prob, param)

    pred_data_arr = data_format(pred_data_arr)
    pred_data_len = len(pred_label_arr)
    wrong = 0
    for idx, data in enumerate(pred_data_arr):
        p_label, p_acc, p_val = svm_predict(
            [pred_label_arr[idx]], [data], svm_model)
        if int(p_label[0]) != int(pred_label_arr[idx]):
            wrong += 1

    accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len
    return pred_data_len, wrong, accuracy
Ejemplo n.º 24
0
 def _predict(self, line):
     if not self.model:
         logging.info("model is None")
         return None 
     prob_y = []
     prob_x = []
     line = line.split(None, 1)
     # In case an instance with all zero features
     if len(line) == 1: line += ['']
     label, features = line
     xi = {}
     for e in features.split():
         #logging.info("e %s",e)
         ind, val = e.split(":")
         xi[int(ind)] = float(val)
     prob_y += [float(label)]
     prob_x += [xi]
     pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(prob_y,prob_x,self.model)
     return pred_labels[0]
Ejemplo n.º 25
0
Archivo: cbir.py Proyecto: chbrandt/bit
def svr_prediction(X_features,Y_classes,models=[],predict_options='-b 1'):
    """
    Predict the class members probability
    """
    
    Y_svfeatures = Y_classes.copy()
    
    Y_list,X_list = _convert_arrays2lists(Y_classes,X_features)
    Y_list = [0]*len(Y_list)
    
    labels = []
    accur = []
    vals = []
    for i_model in models:
    
        # predict model
        p_labels,p_accur,p_vals = svm_predict(Y_list,X_list,i_model,predict_options)
        
        labels.append(p_labels)
    
    return labels
Ejemplo n.º 26
0
 def computeLayer(self, layer):
   """
   @param layer: the output HmaxLayer to store results in.
   """
   LevelFilter.computeLayer(layer)
   
   #need to enable learning mode from UI (after S2 trained)
   #during training, need to pass in class labels
   
   vec = layer.array[:,0,0].tolist()
   
   if self.isLearning:
     if len(self.learned)>=100 and self.svmModel==None:
       #model = svm_train(y, x [, 'training_options'])
       self.svmModel = svmutil.svm_train(self.classes, self.learned)
       return
     self.classes.append(0)
     self.learned.append(vec)
   elif self.svmModel!=None:
     #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
     pLabs, pAcc, pVals = svmutil.svm_predict([0], vec, self.svmModel)
     print "SVM Result: ", pLabs, pAcc, pVals
Ejemplo n.º 27
0
def predict_emotion_paef():
    model = svmutil.svm_load_model("C:/Users/Admin/PycharmProjects/Emotion_Detection/trained_models/paef_models/artphoto_train.txt.model")
    mapping = {
        0 : "happy",
        2 : "fear",
        3 : "excitement",
        4 : "disgust",
        6 : "anger",
        7 : "sad"
    }

    with open('image_data.txt', 'r') as feature:
        for line in feature:
            line = line.strip()
            feature_vector = line.split()[1:]
            feature_vector = [float(item.split(":")[-1]) for item in feature_vector]

        p_labs, p_acc, p_vals = svmutil.svm_predict([0], [feature_vector], model)
        print("p_labs")
        print(p_labs)
        lab = p_labs[0]
        return mapping[int(lab)]
Ejemplo n.º 28
0
    def computeLayer(self, layer):
        """
    Override the computeLayer from LevelFilter in order to continue
    with processing the learning or inferring using our SVM model.
    If the SVM is trained and we are inferring, then the SVM inference
    results are stored in the layer (which is assumed to be a LayerC2).
    The layers are able to render themselves onto a wx canvas for 
    inspection.
    @param layer: the output HmaxLayer to store results in.
    """
        LevelFilter.computeLayer(self, layer)

        #need to enable learning mode from UI (after S2 trained)
        #during training, need to pass in class labels
        vec = layer.array[:, 0, 0].tolist()  #contains vector of C2 maxes

        if self.isLearning and self.__svmModel == None:
            #add to count for how many of this class have been learned
            count = self.__classCounts.get(self.learningClass, 0)
            self.__classCounts[self.learningClass] = count + 1

            #copy base input image to use as example when showing SVM result
            if count == 0:
                layer.saveExampleImage(self.learningClass)

            self.classes.append(self.learningClass)
            self.learned.append(vec)
            #print "learned svm ",len(self.learned),self.learningClass
        elif self.__svmModel != None:
            #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
            pLabs, pAcc, pVals = svmutil.svm_predict([0], [vec],
                                                     self.__svmModel, "-b 1")
            pVals = pVals[0]
            #sort ids in case SVM classIDs not consecutive
            ids = sorted(self.__classCounts.keys())
            layer.setAccuracyResult(sorted(zip(pVals, ids), reverse=True))
            if HMAX.DEBUG:
                print "SVM Result: ", pLabs, pAcc, pVals
 def computeLayer(self, layer):
   """
   Override the computeLayer from LevelFilter in order to continue
   with processing the learning or inferring using our SVM model.
   If the SVM is trained and we are inferring, then the SVM inference
   results are stored in the layer (which is assumed to be a LayerC2).
   The layers are able to render themselves onto a wx canvas for 
   inspection.
   @param layer: the output HmaxLayer to store results in.
   """
   LevelFilter.computeLayer(self, layer)
   
   #need to enable learning mode from UI (after S2 trained)
   #during training, need to pass in class labels
   vec = layer.array[:,0,0].tolist() #contains vector of C2 maxes
   
   if self.isLearning and self.__svmModel==None:
     #add to count for how many of this class have been learned
     count = self.__classCounts.get(self.learningClass, 0)
     self.__classCounts[self.learningClass] = count+1
     
     #copy base input image to use as example when showing SVM result
     if count==0:
       layer.saveExampleImage(self.learningClass)
     
     self.classes.append(self.learningClass)
     self.learned.append(vec)
     #print "learned svm ",len(self.learned),self.learningClass
   elif self.__svmModel!=None:
     #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options'])
     pLabs, pAcc, pVals = svmutil.svm_predict([0], [vec], self.__svmModel, "-b 1")
     pVals = pVals[0]
     #sort ids in case SVM classIDs not consecutive
     ids = sorted(self.__classCounts.keys())
     layer.setAccuracyResult(sorted(zip(pVals, ids),reverse=True))
     if HMAX.DEBUG:
       print "SVM Result: ", pLabs, pAcc, pVals
Ejemplo n.º 30
0
def main(args):
    ref_path = args.ref_path

    height = args.height
    width = args.width

    ref_fps = args.ref_fps
    bit_depth = args.bit_depth
    if bit_depth == 8:
        pix_format = 'yuv420p'
    else:
        pix_format = 'yuv420p10le'

    fps = args.dist_fps  #frame rate of distorted sequence

    #Obtain pseudo reference video by frame dropping using ffmpeg
    cmd = 'ffmpeg -r '+ str(ref_fps) +' -pix_fmt ' + pix_format + ' -s ' + str(width) +\
    'x' + str(height) + ' -i '+ ref_path + ' -filter:v fps=fps=' +\
    str(fps) + ' pseudo_reference.yuv'
    os.system(cmd)

    GREED_feat = greed_feat(args)

    #load svm model
    model = svm_load_model('model_params/' + args.temp_filt + '.model')

    #load parameter of trained features
    feat_param = scipy.io.loadmat('model_params/' + args.temp_filt +
                                  '_params.mat')
    low = feat_param['low'][0, :]
    high = feat_param['high'][0, :]
    GREED_feat = (GREED_feat - low) / (high - low)

    #Predict score
    score, _, _ = svm_predict([0.0], GREED_feat[None, :], model, '-q')
    print(score)
Ejemplo n.º 31
0
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]):
    """
    Run k KFold

    Args:
        param_dict: dictionary mapping param string to its value
        rows: input rows
        numfold: k for cross validation
        kmers: list of kmers, default [1,2,3]
    Return:
        dictionary of model performance (SCC, MSE) if benchmark is True, else
        return predictions for each fold
    """
    kf = KFold(numfold, shuffle=True)
    splitted = kf.split(rows)
    param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode
    param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()])
    params = svmutil.svm_parameter(param_str)

    foldidx = 1
    fold_results = []
    for train_idx, test_idx in splitted:
        train_list = [rows[i] for i in train_idx]
        test_list = [rows[i] for i in test_idx]

        y_train, x_train = libsvm_generate_matrix(train_list, kmers)
        y_test, x_test = libsvm_generate_matrix(test_list, kmers)

        train_prob  = svmutil.svm_problem(y_train, x_train)

        model = svmutil.svm_train(train_prob, params)
        #svmutil.svm_save_model('model_name.model', m)
        # y is only needed when we need the model performance
        svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q")
        fold_results.append({"test":test_list, "svmpred":svmpred})
    return fold_results
Ejemplo n.º 32
0
 def run(self, mode):
     '''
     0-Linear Kernel 1-Polynomial Kernel 2-RBF Kernel 3-Best parameter RBF kernel 4-User define kernel
     '''
     if mode <= 2:
         model = self.Training('-t {} -b 1'.format(mode), self.Y_train,
                               self.X_train)
         svmutil.svm_predict(self.Y_test, self.X_test, model)
     elif mode == 3:
         (C, gamma) = self.GridSearch()
         model = self.Training('-t 2 -c {} -g {}'.format(C, gamma),
                               self.Y_train, self.X_train)
         svmutil.svm_predict(self.Y_test, self.X_test, model)
     elif mode == 4:
         data = self.UserDefineKernel(self.X_train, self.X_train, 0.01)
         print('{} {}'.format(len(data), len(self.Y_train)))
         model = self.Training('-t 4 -c {} -g {} -b 1'.format(1000, 0.01),
                               self.Y_train, data, True)
         test = self.UserDefineKernel(self.X_test, self.X_train, 0.01)
         svmutil.svm_predict(self.Y_test, test, model)
         '''
Ejemplo n.º 33
0
f.close()

train_data = train_true[::]
train_data.extend(train_fake)
tmp = list()
for li in train_data:
    tmp.append(dict(enumerate(li)))
train_data = tmp
train_label = [1 for i in range(len(train_true))
               ] + [-1 for i in range(len(train_fake))]

eva_data = eva_true[::]
eva_data.extend(eva_fake)
tmp = list()
for li in eva_data:
    tmp.append(dict(enumerate(li)))
eva_data = tmp
eva_label = [1 for i in range(len(eva_true))
             ] + [-1 for i in range(len(eva_fake))]

model = svmutil.svm_train([1 for i in range(len(train_true))] +
                          [-1 for i in range(len(train_fake))], train_data,
                          '-c 0.03125 -g 0.25')
print type(model)
p_label, p_acc, p_val = svmutil.svm_predict([1 for i in range(len(eva_true))] +
                                            [-1 for i in range(len(eva_fake))],
                                            eva_data, model)

print p_acc
Ejemplo n.º 34
0
def main():
    m = svmutil.svm_load_model('trained_models/svm.model')
    relation_tag = {0: 'None', 1: 'has_value', 2: 'has_temp', 3: 'modified_by'}

    match = re.search('^(.*)\.txt', sys.argv[2])
    filename = sys.argv[2]
    if match:
        filename = match.group(1)

    input_dir = sys.argv[1] + '/' + filename + '_NER.xml'
    output_dir = sys.argv[1] + '/' + filename + '_Parsed.xml'
    print "Reading NER results from ", input_dir

    tree = ET.ElementTree(file=input_dir)

    root = tree.getroot()
    relations = {}
    index = []
    for child in root:

        syn_features = codecs.open('Tempfile/relation_scale', 'w')
        temp_pairs = relation_features.generate_pairs(child, syn_features)
        if temp_pairs:
            try:
                y, x = svmutil.svm_read_problem('Tempfile/relation_scale')
                p_label, p_acc, p_val = svmutil.svm_predict(y, x, m)
            except ValueError:
                for child2 in child.findall('text'):
                    print child2.text
                continue

        #print len(p_label),len(temp_pairs)
        else:
            p_label = []
            temp_pairs = []
        for j in range(0, len(p_label)):
            #print j
            relations[temp_pairs[j]] = p_label[j]

            indexes = temp_pairs[j].split("_")
            index.append(indexes[0])
            index.append(indexes[1])

        for child2 in child.findall('entity'):
            node_index = child2.attrib['index']
            child2.attrib['relation'] = 'None'

            if node_index in index:

                right_pattern = '^(\w+)_' + node_index
                left_pattern = node_index + '_(\w+)$'
                for relation in relations:
                    match1 = re.search(left_pattern, relation)
                    match2 = re.search(right_pattern, relation)
                    other_index = None
                    if match1:
                        other_index = match1.group(1)
                    else:
                        if match2:
                            other_index = match2.group(1)
                        else:
                            continue
                    relation_type = relation_tag[relations[relation]]
                    if relation_type == 'None':
                        continue

                    if child2.attrib['relation'] is 'None':
                        child2.attrib[
                            'relation'] = other_index + ":" + relation_type
                    else:
                        child2.attrib['relation'] = child2.attrib[
                            'relation'] + "|" + other_index + ":" + relation_type
            #print child2.text,child2.attrib['index'],child2.attrib['relation']

    relation_excuted = os.path.exists("in.parse")
    if relation_excuted:
        os.system('rm in.parse')
    os.system('rm Tempfile/relation_scale')
    print "Writing Relation xml to ", output_dir
    new_tree = codecs.open(output_dir, 'w')
    tree.write(new_tree)
    print "Finished!"
Ejemplo n.º 35
0
def get_dataset(fname,k):
	labels = []
	features = []
	for prot_id,seq in seq2feature.parse_fasta(fname):
		labels.append(prot_id)
		features.append(k_spec(seq,k))
	return labels,features

if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Predict X binding proteins.')
	
	parser.add_argument('-model',action="store",dest="model")
	parser.add_argument('-thr',action="store",dest="thr",type = float)
	parser.add_argument('-fname',action="store",dest="fname")
	model = parser.parse_args().model
	fname = parser.parse_args().fname
	thr = parser.parse_args().thr
	
	labels,features = get_dataset(fname,2)
	model = svmutil.svm_load_model(model)

	plbl, pacc, pvals = svmutil.svm_predict([0]*len(features),features,model,"")

	for cnt,(prot_id,seq) in enumerate(seq2feature.parse_fasta(fname)):
		pval = pvals[cnt][0]
		if pval >= thr:
			print "> %s:%f" % (prot_id,pval)
			print seq
	
Ejemplo n.º 36
0
    sum1 = sum1/len(headlines[h])
#     print(sum1)
    sum1 = np.concatenate((sum1,cat_vec[combine_f["Category"][h]],cat_vec[combine_f["Post_Type"][h]], arrays[h]))
    head_vec.append(sum1)

head_vec = np.array(head_vec)
feat_len = int(0.8*len(head_vec))
train = head_vec[:feat_len]
res_train = combine_f["class"][:feat_len].tolist()
test = head_vec[feat_len:]
res_test = combine_f["class"][feat_len:].tolist()
print(len(test), len(res_test))

model_svm = svmutil.svm_train(res_train, train,'-t 2 -c 2')

y = svmutil.svm_predict(res_test, test, model_svm)

confusion_matrix(res_test, y[0], labels=[0,1])

tn, fp, fn, tp = confusion_matrix(res_test, y[0], labels=[0,1]).ravel()

recall = tp/(tp+fn)
recall

precision = tp/(tp+fp)
precision

f1 = precision*recall*2/(precision+recall)
f1

fpr, tpr, thresholds = roc_curve(res_test, y[0])
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
prob = svm_problem(y_train,X_train_pca)
param = svm_parameter("-q")
param.kernel='rbf'
#param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
 #             'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
param.C=32
param.gamma=0.0001
print("Pass")
#parameters = GridSearchCV( param_grid)
model= svm_train(prob,param)
#clf = clf.fit(X_train_pca, y_train)

#print("Best estimator found by grid search:")
#print(m.best_estimator_)
y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model)

################################################################################
## Quantitative evaluation of the model quality on the test set


print(classification_report(y_test, y_pred, target_names=target_names))
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))


print("Predicting people's names on the test set")
################################################################################
## Qualitative evaluation of the predictions using matplotlib
#
def plot_gallery(images, titles, h, w, n_row=4, n_col=8):
    """Helper function to plot a gallery of portraits"""
Ejemplo n.º 38
0
 def classify(x, y, model=model):
     return array(
         svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                             model)[0])
Ejemplo n.º 39
0
 def classify(x, y, model=model):
   return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)),
                                    model)[0])
Ejemplo n.º 40
0
def detectcontact(photolist,
                  n,
                  savesize=20,
                  delsize=15,
                  thresholds=[9, 0.75, 6],
                  historysize=10,
                  blocksize=10,
                  Npatches=20):
    """
    photolist = list of photoitems (these are in the files saved by the tracking system).
    n = index from this list to compute the locations for.
    savesize = controls the size of the patch that is saved into the 'contact' object.
    delsize = controls the size of the patch that is deleted from the search image around a maximum.
    thresholds = thresholds for 'non-ML' decision boundary for if a maximum is a reflector
    historysize = how far back through the list to go, when computing
    blocksize = how much to dilate the current no-flash image compared to the current flash image
    
    TODO Fix Bug: The code relies on the savesize = 20, as that places the peak at 20,20 in the patch.
    
    Returns:
    contact = This is a list of dictionaries, each associated with a candidate peak in the search image, with these fields:
                 x and y - position of this maximum [ESSENTIAL]
                 patch - the current flash photo minus the current no-flash photo
                 searchpatch - the difference between current pairs and previous pairs of photos (variously dilated)
                               which is searched for its maximum values.
                 mean, searchmax, centremax - various features.
                 confident - a boolean measure of whether the system thinks this is the dot
                 prediction - a real value reporting confidence in being a true retroreflector (NEGATIVE=More likely).
                               the current system works well with a threshold of zero. [ESSENTIAL]
                 
    found = whether a confident dot has been found.
    searchimg = more for debugging, the searchimg used for finding maximums.
    
    Npatches = number of patches to consider (each patch is centred on a maximum)
    
    
    """
    from time import time
    unsortedsets = []
    startn = n - historysize
    if startn < 0: startn = 0
    for i in range(startn, n + 1):
        #photoitem = q.read(i)
        photoitem = photolist[i]
        if photoitem is None: continue
        if photoitem['img'] is None: continue
        assert not isinstance(
            photoitem['img'][0, 0],
            numbers.Integral), "Need image array to be float not integers."
        if 'mean' not in photoitem:
            photoitem['mean'] = np.mean(photoitem['img'][::5, ::5])
        #photoitem['img'] = photoitem['img'].astype(np.float) #already done
        tt = photoitem['record']['triggertime']
        chosenset = None
        for s in unsortedsets:
            if np.abs(
                    tt -
                    np.mean([photoi['record']['triggertime']
                             for photoi in s])) < 0.5:
                chosenset = s
        if chosenset is None:
            unsortedsets.append([photoitem])
        else:
            chosenset.append(photoitem)

    starttime = time()
    sets = []
    for s in unsortedsets:
        if len(s) < 2:  #if the set only has one photo in, skip.
            continue
        newset = {'flash': [], 'noflash': []}
        setmean = np.mean([
            photoitem['mean'] for photoitem in s
            if photoitem['img'] is not None
        ])
        for photoitem in s:
            if photoitem['img'] is not None:
                if photoitem['mean'] > setmean + 0.1:
                    newset['flash'].append(photoitem)
                else:
                    newset['noflash'].append(photoitem)
        if len(newset['flash']) == 0:
            continue  #no point including sets without a flash
        sets.append(newset)

    starttime = time()
    last_diff = None
    this_diff = None
    if len(sets) < 2:
        print("Fewer than two photo sets available")
        return None, False, None  #we can't do this if we only have one photo set
    for i, s in enumerate(sets):
        this_set = i == len(
            sets
        ) - 1  #whether the set is the one that we're looking for the bee in.
        for s_nf in s['noflash']:
            if this_set:
                intertime = time()
                diff = detect(s['flash'][0]['img'],
                              s_nf['img'],
                              blocksize=blocksize
                              )  #for the current search image we dilate
                if this_diff is None:
                    this_diff = diff
                else:
                    this_diff = np.minimum(diff, this_diff)
            else:
                intertime = time()
                if 'nodilationdiff' in s_nf:
                    diff = s_nf['nodilationdiff']
                else:
                    diff = detect(s['flash'][0]['img'],
                                  s_nf['img'],
                                  dilate=None)  #for the past ones we don't
                    if diff is not None:
                        s_nf['nodilationdiff'] = diff
                if last_diff is None:
                    last_diff = diff
                else:
                    last_diff = np.maximum(
                        diff, last_diff)  #TODO: Need to align to other sets

    if (last_diff is None) or (this_diff is None):
        print("Insufficient data")
        return None, False, None

    starttime = time()
    #if there are large changes in the image the chances are the camera's moved... remove those sets before then
    keepafter = 0
    for i in range(len(sets) - 1):
        if np.mean(
                np.abs(sets[i]['noflash'][0]['img'][::5, ::5] -
                       sets[-1]['noflash'][0]['img'][::5, ::5])) > 3:
            keepafter = i
    sets = sets[keepafter:]
    #    #we just align to the first of the old sets.
    imgcorrection = 20
    #    shift = ensemblegetshift(sets[-1]['noflash'][0]['img'],sets[0]['noflash'][0]['img'],searchbox=imgcorrection,step=2,searchblocksize=50,ensemblesizesqrt=3)
    #    #res = alignandsubtract(last_diff,shift,this_diff,margin=10)

    res = detect(this_diff,
                 last_diff,
                 blocksize=10,
                 offset=3,
                 searchbox=imgcorrection)

    #get simple image difference to save as patch.
    img = sets[-1]['flash'][0]['img'] - sets[-1]['noflash'][0]['img']
    searchimg = res.copy()
    contact = []
    found = False
    for i in range(Npatches):
        y, x = np.unravel_index(searchimg.argmax(), searchimg.shape)
        searchmax = searchimg[y, x]

        #if (x<savesize) or (y<savesize) or (x>searchimg.shape[1]-savesize-1) or (y>searchimg.shape[0]-savesize-1): continue
        #target = 1*(((y-truey+alignmentcorrection)**2 + (x-truex+alignmentcorrection)**2)<10**2)
        #print(x,truex,y,truey)
        patch = img[y - savesize + imgcorrection:y + savesize + imgcorrection,
                    x - savesize + imgcorrection:x + savesize +
                    imgcorrection].astype(np.float32)
        searchpatch = searchimg[y - savesize:y + savesize,
                                x - savesize:x + savesize].astype(np.float32)
        searchimg[max(0, y - delsize):min(searchimg.shape[0], y + delsize),
                  max(0, x - delsize):min(searchimg.shape[1], x + delsize)] = 0

        patimg = patch.copy()
        centreimg = patimg[17:24, 17:24].copy()
        patimg[37:44, 37:44] = 0

        centremax = np.max(centreimg.flatten())
        mean = np.mean(patimg.flatten())
        #Possible contact
        if (searchmax > thresholds[0]) & (mean < thresholds[1]) & (
                centremax > thresholds[2]):
            confident = True
        else:
            confident = False
        if confident: found = True

        if model is not None:
            outersurround = max(patch[16, 20], patch[20, 16], patch[24, 20],
                                patch[20, 24], patch[16, 16], patch[16, 24],
                                patch[24, 16], patch[24, 24])
            innersurround = max(patch[18, 20], patch[20, 18], patch[22, 20],
                                patch[20, 22], patch[18, 18], patch[18, 22],
                                patch[22, 18], patch[22, 22])
            centre = np.sum([
                patch[20, 20], patch[20, 21], patch[20, 19], patch[19, 20],
                patch[21, 20]
            ])
            res = np.array([[
                searchmax, centremax, mean, outersurround, innersurround,
                centre
            ]])
            _, _, pred = svm_predict([], res, model, '-q')
        else:
            pred = None
        contact.append({
            'x': x + imgcorrection,
            'y': y + imgcorrection,
            'patch': patch,
            'searchpatch': searchpatch,
            'mean': mean,
            'searchmax': searchmax,
            'centremax': centremax,
            'confident': confident,
            'prediction': pred[0][0]
        })
    return contact, found, searchimg
Ejemplo n.º 41
0
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'Bayes Accuracy:', acc
print_confusion(res, test_labels, classnames)
# FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced
# enough. Probably some float underflow due to things not using log
# probabilities?

# Test SVM.
features = map(list, features)
test_features = map(list, test_features)

str_int_map = {}  # libSVM needs int labels.
for i, c in enumerate(classnames):
    str_int_map[c], str_int_map[i] = i, c


def convert_labels(labels, str_int_map):
    return [str_int_map[l] for l in labels]


problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features)
# Use a linear kernel, radial basis functions have horrible results (~20% acc)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
res = svmutil.svm_predict(convert_labels(test_labels, str_int_map),
                          test_features, model)[0]
res = convert_labels(res, str_int_map)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'SVM Accuracy:', acc
print_confusion(res, test_labels, classnames)
Ejemplo n.º 42
0
 def predict_label( self, feature ):
     options = self.get_predict_options()
     y = [-1]
     (p_labels, p_acc, p_vals) = svm_predict( y, [feature], self._model, options )
     return p_labels[0]
Ejemplo n.º 43
0
vername = os.path.join(SUDOKU_PATH, 'sudoku18.sud')

im = numpy.array(Image.open(imname).convert('L'))

x = sudoku.find_sudoku_edges(im, axis=0)
y = sudoku.find_sudoku_edges(im, axis=1)
tic.k('found edges')


# Extract cells, run OCR.
OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/'
features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training'))
problem = svmutil.svm_problem(labels, map(list, features))
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
tic.k('built OCR model')

crops = []
for col in range(9):
  for row in range(9):
    crop = im[y[col]:y[col + 1], x[row]:x[row + 1]]
    crops.append(ocr.compute_feature(crop))
tic.k('extracted cells')

res = svmutil.svm_predict(numpy.loadtxt(vername), map(list, crops), model)[0]
tic.k('recognized cells')

res = numpy.array(res).reshape(9, 9)
print 'Recognized board:'
print res
res = bc.classify(test_features)[0]
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'Bayes Accuracy:', acc
print_confusion(res, test_labels, classnames)
# FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced
# enough. Probably some float underflow due to things not using log
# probabilities?


# Test SVM.
features = map(list, features)
test_features = map(list, test_features)

str_int_map = {}  # libSVM needs int labels.
for i, c in enumerate(classnames):
  str_int_map[c], str_int_map[i] = i, c

def convert_labels(labels, str_int_map):
  return [str_int_map[l] for l in labels]

problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features)
# Use a linear kernel, radial basis functions have horrible results (~20% acc)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)
res = svmutil.svm_predict(
    convert_labels(test_labels, str_int_map), test_features, model)[0]
res = convert_labels(res, str_int_map)
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'SVM Accuracy:', acc
print_confusion(res, test_labels, classnames)
Ejemplo n.º 45
0
def main():
    m=svmutil.svm_load_model('trained_models/svm.model')
    relation_tag={0:'None',1:'has_value',2:'has_temp',3:'modified_by'}

    match=re.search('^(.*)\.txt',sys.argv[2])
    filename=sys.argv[2]
    if match:
        filename=match.group(1)

    input_dir=sys.argv[1]+'/'+filename+'_NER.xml'
    output_dir=sys.argv[1]+'/'+filename+'_Parsed.xml'
    print "Reading NER results from ", input_dir

    tree = ET.ElementTree(file=input_dir)

    root = tree.getroot()
    relations={}
    index=[]
    for child in root:

        syn_features=codecs.open('Tempfile/relation_scale','w')
        temp_pairs=relation_features.generate_pairs(child,syn_features)
        if temp_pairs:
            try:
                y,x=svmutil.svm_read_problem('Tempfile/relation_scale')
                p_label,p_acc,p_val=svmutil.svm_predict(y,x,m)
            except ValueError:
                for child2 in child.findall('text'):
                    print child2.text
                continue

        #print len(p_label),len(temp_pairs)
        else:
            p_label=[]
            temp_pairs=[]
        for j in range(0,len(p_label)):
            #print j
            relations[temp_pairs[j]]=p_label[j]

            indexes=temp_pairs[j].split("_")
            index.append(indexes[0])
            index.append(indexes[1])

        for child2 in child.findall('entity'):
            node_index=child2.attrib['index']
            child2.attrib['relation']='None'

            if node_index in index:

                right_pattern='^(\w+)_'+node_index
                left_pattern=node_index+'_(\w+)$'
                for relation in relations:
                    match1=re.search(left_pattern,relation)
                    match2=re.search(right_pattern,relation)
                    other_index= None
                    if match1:
                        other_index=match1.group(1)
                    else:
                        if match2:
                            other_index=match2.group(1)
                        else:
                            continue
                    relation_type=relation_tag[relations[relation]]
                    if relation_type == 'None':
                        continue

                    if child2.attrib['relation'] is 'None':
                        child2.attrib['relation']=other_index+":"+relation_type
                    else:
                        child2.attrib['relation']=child2.attrib['relation']+"|"+other_index+":"+relation_type
            #print child2.text,child2.attrib['index'],child2.attrib['relation']

    relation_excuted=os.path.exists("in.parse")
    if relation_excuted:
        os.system('rm in.parse')
    os.system('rm Tempfile/relation_scale')
    print "Writing Relation xml to ", output_dir
    new_tree=codecs.open(output_dir,'w')
    tree.write(new_tree)
    print "Finished!"
Ejemplo n.º 46
0
 def _prob(self, x):
     y = [0] * len(x)
     p_labs, p_acc, p_vals = libsvmutil.svm_predict(y, x.tolist(),
                                                    self.model, "-b 1")
     labels = self.model.get_labels()
     return [dict(list(zip(labels, ps))) for ps in p_vals]
Ejemplo n.º 47
0
import os

from libsvm import svmutil

import ocr

OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/'
features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training'))
test_features, test_labels = \
    ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing'))

features = map(list, features)
test_features = map(list, test_features)


problem = svmutil.svm_problem(labels, features)
param = svmutil.svm_parameter('-q -t 0')
model = svmutil.svm_train(problem, param)

print 'Training data fit:'
svmutil.svm_predict(labels, features, model)
print 'Testing data fit:'
svmutil.svm_predict(test_labels, test_features, model)