def _classify_arrays(self, array_iter): if not self.has_model(): raise RuntimeError("No SVM model present for classification") # Dump descriptors into a matrix for normalization and use in # prediction. vec_mat = numpy.array(list(array_iter)) vec_mat = self._norm_vector(vec_mat) all_label_list = self.get_labels() svm_label_map = self.svm_label_map c_base = dict((l, 0.) for l in all_label_list) # Effectively reproducing the body of svmutil.svm_predict in order to # simplify and get around excessive prints svm_type = self.svm_model.get_svm_type() nr_class = self.svm_model.get_nr_class() # Model internal labels. Parallel to ``prob_estimates`` array. svm_model_labels = self.svm_model.get_labels() # TODO: Normalize input arrays in batch(es). TEST if current norm # function can just take a matrix? if self.svm_model.is_probability_model(): if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]: nr_class = 0 prob_estimates = (ctypes.c_double * nr_class)() for v in vec_mat: # normalize vector v, idx = svm.gen_svm_nodearray(v.tolist()) svm.libsvm.svm_predict_probability(self.svm_model, v, prob_estimates) c = dict(c_base) # Shallow copy c.update({svm_label_map[l]: p for l, p in zip(svm_model_labels, prob_estimates[:nr_class])}) yield c else: # noinspection PyUnresolvedReferences if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC): nr_classifier = 1 else: nr_classifier = nr_class * (nr_class - 1) // 2 # noinspection PyCallingNonCallable,PyTypeChecker dec_values = (ctypes.c_double * nr_classifier)() for v in vec_mat: # normalize vector v, idx = svm.gen_svm_nodearray(v.tolist()) label = svm.libsvm.svm_predict_values(self.svm_model, v, dec_values) c = dict(c_base) # Shallow copy c[svm_label_map[label]] = 1. yield c
def single_label(v): dec_values = (ctypes.c_double * nr_classifier)() v, idx = svm.gen_svm_nodearray(v.tolist()) label = svm.libsvm.svm_predict_values(self.svm_model, v, dec_values) c = dict(c_base) # Shallow copy c[svm_label_map[label]] = 1. return c
def _classify(self, d): """ Internal method that defines the generation of the classification map for a given DescriptorElement. This returns a dictionary mapping integer labels to a floating point value. :param d: DescriptorElement containing the vector to classify. :type d: smqtk.representation.DescriptorElement :raises RuntimeError: Could not perform classification for some reason (see message). :return: Dictionary mapping trained labels to classification confidence values :rtype: dict[collections.Hashable, float] """ if not self.has_model(): raise RuntimeError("No SVM model present for classification") # Get and normalize vector v = d.vector().astype(float) v = self._norm_vector(v) v, idx = svm.gen_svm_nodearray(v.tolist()) # Effectively reproducing the body of svmutil.svm_predict in order to # simplify and get around excessive prints svm_type = self.svm_model.get_svm_type() nr_class = self.svm_model.get_nr_class() c = dict((l, 0.) for l in self.get_labels()) if self.svm_model.is_probability_model(): # noinspection PyUnresolvedReferences if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]: nr_class = 0 # noinspection PyCallingNonCallable prob_estimates = (ctypes.c_double * nr_class)() svm.libsvm.svm_predict_probability(self.svm_model, v, prob_estimates) # Update dict for l, p in zip(self.svm_model.get_labels(), prob_estimates[:nr_class]): c[self.svm_label_map[l]] = p else: # noinspection PyUnresolvedReferences if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC): nr_classifier = 1 else: nr_classifier = nr_class * (nr_class - 1) // 2 # noinspection PyCallingNonCallable dec_values = (ctypes.c_double * nr_classifier)() label = svm.libsvm.svm_predict_values(self.svm_model, v, dec_values) # Update dict c[self.svm_label_map[label]] = 1. assert len(c) == len(self.svm_label_map) return c
def single_pred(v): prob_estimates = (ctypes.c_double * nr_class)() v, idx = svm.gen_svm_nodearray(v.tolist()) svm.libsvm.svm_predict_probability(self.svm_model, v, prob_estimates) c = dict(c_base) # Shallow copy c.update({svm_label_map[label]: prob for label, prob in zip(svm_model_labels, prob_estimates[:nr_class])}) return c
def predict_ion(chem_env, scatter_env, elements=None, svm_name=None): """ Uses the trained classifier to predict the ions that most likely fit a given list of features about the site. Parameters ---------- chem_env : mmtbx.ions.environment.ChemicalEnvironment A object containing information about the chemical environment at a site. scatter_env : mmtbx.ions.environment.ScatteringEnvironment, optional An object containing information about the scattering environment at a site. elements : list of str, optional A list of elements to include within the prediction. Must be a subset of mmtbx.ions.svm.ALLOWED_IONS. Note: Water is not added to elements by default. svm_name : str, optional The SVM to use for prediction. By default, the SVM trained on heavy atoms and calcium in the presence of anomalous data is used Returns ------- list of tuple of str, float or None A sorted list of classes and the predicted probabilities associated with each or None if the trained classifier cannot be loaded. """ # Load the classifier and the parameters used to interact with it classifier, vector_options, scaling, features = _get_classifier(svm_name) if classifier is None or vector_options is None: return None # Convert our data into a format that libsvm will accept vector = ion_vector(chem_env, scatter_env, **vector_options) vector = utils.scale_to([vector], scaling[0], scaling[1])[0] assert len(vector) == len(features) vector = vector[features] xi = svm.gen_svm_nodearray( list(vector), isKernel=classifier.param.kernel_type == svm.PRECOMPUTED, )[0] nr_class = classifier.get_nr_class() # prob_estimates isn't actually read by svm_predict_probability, it is only # written to with the final estimates. We just need to allocate space for it. prob_estimates = (c_double * nr_class)() svm.libsvm.svm_predict_probability(classifier, xi, prob_estimates) probs = prob_estimates[:nr_class] labels = [ALLOWED_IONS[i] for i in classifier.get_labels()] lst = zip(labels, probs) lst.sort(key=lambda x: -x[-1]) if elements is not None: for element in elements: if element not in ALLOWED_IONS: raise Sorry("Unsupported element '{}'".format(element)) # Filter out elements the caller does not care about classes, probs = [], [] for element, prob in lst: if element in elements: classes.append(element) probs.append(prob) # Re-normalize the probabilities total = sum(probs) probs = [i / total for i in probs] lst = zip(classes, probs) return lst
state = 'single' probs = (svm.c_double*2)(0,0) pA = 1. pB = 1. maxamp = 0. sigs_now = [] #H = np.zeros(256) print('-') counter = 0 tam = A.shape[0] for i in xrange(tam): if i % 10000 == 0: sys.stdout.write('\rA:\t%d\t%d\t%f\t%d\t%f'%(i, tam, i / tam, counter, counter/(i+1))) sys.stdout.flush() x0, max_idx = svm.gen_svm_nodearray(A[i].tolist()) c = svm.libsvm.svm_predict_probability(model, x0, probs) off, ch, sig_now = sig.next() M = abs(sig_now).max() pA *= probs[0] ** M pB *= probs[1] ** M maxamp += M sigs_now.append(sig_now) #H += np.abs(sg.hilbert(sig_now)) if (i+1) % 11 == 0: pA = pA ** (1. / maxamp) pB = pB ** (1. / maxamp) fig = plt.figure(1,figsize=(16,16)) timer = fig.canvas.new_timer(interval=2000) def close_event():
probs = (svm.c_double * 2)(0, 0) pA = 1. pB = 1. maxamp = 0. sigs_now = [] #H = np.zeros(256) print('-') counter = 0 tam = A.shape[0] for i in xrange(tam): if i % 10000 == 0: sys.stdout.write('\rA:\t%d\t%d\t%f\t%d\t%f' % (i, tam, i / tam, counter, counter / (i + 1))) sys.stdout.flush() x0, max_idx = svm.gen_svm_nodearray(A[i].tolist()) c = svm.libsvm.svm_predict_probability(model, x0, probs) off, ch, sig_now = sig.next() M = abs(sig_now).max() pA *= probs[0]**M pB *= probs[1]**M maxamp += M sigs_now.append(sig_now) #H += np.abs(sg.hilbert(sig_now)) if (i + 1) % 11 == 0: pA = pA**(1. / maxamp) pB = pB**(1. / maxamp) fig = plt.figure(1, figsize=(16, 16)) timer = fig.canvas.new_timer(interval=2000)
prob = svm_problem([int(t) for t in TrainDS['target']],[list(i) for i in TrainDS['input']]) param = svm_parameter() # option: -t 0: linear kernel. Best for classification. # option: -c 0.01: regularization parameter. smaller is more regularization # see below for all options param.parse_options('-t 0 -c 0.01') print "Training svm..." model = libsvm.svm_train(prob,param) print "Testing svm with three random inputs" from random import randrange for j in range(3): i = randrange(0,len(TestDS)) #again some conversion needed because of low level interface x0,m_idx = gen_svm_nodearray(list(TestDS['input'][i])) prediction = libsvm.svm_predict(model, x0) print("Target:{0}, prediction:{1}".format(TestDS['target'][i],prediction)) #test svm over test dataset correct = 0 for j in range(len(TestDS)): #again some conversion needed because of low level interface x0,m_idx = gen_svm_nodearray(list(TestDS['input'][j])) prediction = libsvm.svm_predict(model, x0) if int(prediction) == int(TestDS['target'][j]): correct +=1 print "Accuracy on test set is {0}%".format(correct*100.0/len(TestDS)) ###possible parameters###
prob = svm_problem([int(t) for t in TrainDS['target']], [list(i) for i in TrainDS['input']]) param = svm_parameter() # option: -t 0: linear kernel. Best for classification. # option: -c 0.01: regularization parameter. smaller is more regularization # see below for all options param.parse_options('-t 0 -c 0.01') print "Training svm..." model = libsvm.svm_train(prob, param) print "Testing svm with three random inputs" from random import randrange for j in range(3): i = randrange(0, len(TestDS)) #again some conversion needed because of low level interface x0, m_idx = gen_svm_nodearray(list(TestDS['input'][i])) prediction = libsvm.svm_predict(model, x0) print("Target:{0}, prediction:{1}".format(TestDS['target'][i], prediction)) #test svm over test dataset correct = 0 for j in range(len(TestDS)): #again some conversion needed because of low level interface x0, m_idx = gen_svm_nodearray(list(TestDS['input'][j])) prediction = libsvm.svm_predict(model, x0) if int(prediction) == int(TestDS['target'][j]): correct += 1 print "Accuracy on test set is {0}%".format(correct * 100.0 / len(TestDS)) ###possible parameters### # options:
import svm labels = [0, 1] samples = [[0, 0], [0, 1]] labels = [0, 1, 1, 2] samples = [[0, 0], [0, 1], [1, 0], [1, 1]] import svm labels = [0, 0, 1, 1] samples = [[1, 1], [1, -1], [-1, 1], [-1, -1]] param = svm.svm_parameter("-c 1") problem = svm.svm_problem(labels, samples) model = svm.libsvm.svm_train(problem, param) pmodel = svm.toPyModel(model) pmodel.predict_values(samples[0]) for i in range(len(samples)): print svm.libsvm.svm_predict(model, svm.gen_svm_nodearray(samples[i])[0]) r = (c_double * 6)() svm.libsvm.svm_predict_values(model, svm.gen_svm_nodearray(samples[0])[0], r)
def _classify(self, d): """ Internal method that constructs the label-to-confidence map (dict) for a given DescriptorElement. The passed descriptor element is guaranteed to have a vector to extract. It is not extracted yet due to the philosophy of waiting until the vector is immediately needed. This moment is thus determined by the implementing algorithm. :param d: DescriptorElement containing the vector to classify. :type d: smqtk.representation.DescriptorElement :raises RuntimeError: Could not perform classification for some reason (see message in raised exception). :return: Dictionary mapping trained labels to classification confidence values :rtype: dict[collections.Hashable, float] """ if not self.has_model(): raise RuntimeError("No SVM model present for classification") # Get and normalize vector v = d.vector().astype(float) v = self._norm_vector(v) v, idx = svm.gen_svm_nodearray(v.tolist()) # Effectively reproducing the body of svmutil.svm_predict in order to # simplify and get around excessive prints svm_type = self.svm_model.get_svm_type() nr_class = self.svm_model.get_nr_class() c = dict((l, 0.) for l in self.get_labels()) if self.svm_model.is_probability_model(): # noinspection PyUnresolvedReferences if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]: nr_class = 0 # noinspection PyCallingNonCallable,PyTypeChecker prob_estimates = (ctypes.c_double * nr_class)() svm.libsvm.svm_predict_probability(self.svm_model, v, prob_estimates) # Update dict for l, p in zip(self.svm_model.get_labels(), prob_estimates[:nr_class]): c[self.svm_label_map[l]] = p else: # noinspection PyUnresolvedReferences if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC): nr_classifier = 1 else: nr_classifier = nr_class * (nr_class - 1) // 2 # noinspection PyCallingNonCallable,PyTypeChecker dec_values = (ctypes.c_double * nr_classifier)() label = svm.libsvm.svm_predict_values(self.svm_model, v, dec_values) # Update dict c[self.svm_label_map[label]] = 1. assert len(c) == len(self.svm_label_map) return c
def calc_distance(self, x): svm_node_array, _ = svm.gen_svm_nodearray(x) distance = svmutil.svm_distance_from_plane(svm_node_array, self.svm_model) return abs(distance[0])
import svm labels = [0, 1] samples = [[0, 0], [0, 1]] labels = [0, 1, 1, 2] samples = [[0, 0], [0, 1], [1, 0], [1, 1]] import svm labels = [0, 0, 1, 1] samples = [[1, 1], [1, -1], [-1, 1], [-1, -1]] param = svm.svm_parameter('-c 1') problem = svm.svm_problem(labels, samples) model = svm.libsvm.svm_train(problem, param) pmodel = svm.toPyModel(model) pmodel.predict_values(samples[0]) for i in range(len(samples)): print svm.libsvm.svm_predict(model, svm.gen_svm_nodearray(samples[i])[0]) r = (c_double * 6)() svm.libsvm.svm_predict_values(model, svm.gen_svm_nodearray(samples[0])[0], r)