Esempio n. 1
0
    def _classify_arrays(self, array_iter):
        if not self.has_model():
            raise RuntimeError("No SVM model present for classification")

        # Dump descriptors into a matrix for normalization and use in
        # prediction.
        vec_mat = numpy.array(list(array_iter))
        vec_mat = self._norm_vector(vec_mat)

        all_label_list = self.get_labels()
        svm_label_map = self.svm_label_map
        c_base = dict((l, 0.) for l in all_label_list)

        # Effectively reproducing the body of svmutil.svm_predict in order to
        # simplify and get around excessive prints
        svm_type = self.svm_model.get_svm_type()
        nr_class = self.svm_model.get_nr_class()
        # Model internal labels. Parallel to ``prob_estimates`` array.
        svm_model_labels = self.svm_model.get_labels()

        # TODO: Normalize input arrays in batch(es). TEST if current norm
        #       function can just take a matrix?

        if self.svm_model.is_probability_model():
            if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]:
                nr_class = 0
            prob_estimates = (ctypes.c_double * nr_class)()
            for v in vec_mat:
                # normalize vector
                v, idx = svm.gen_svm_nodearray(v.tolist())
                svm.libsvm.svm_predict_probability(self.svm_model, v,
                                                   prob_estimates)

                c = dict(c_base)  # Shallow copy
                c.update({svm_label_map[l]: p for l, p
                          in zip(svm_model_labels, prob_estimates[:nr_class])})
                yield c
        else:
            # noinspection PyUnresolvedReferences
            if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC):
                nr_classifier = 1
            else:
                nr_classifier = nr_class * (nr_class - 1) // 2
            # noinspection PyCallingNonCallable,PyTypeChecker
            dec_values = (ctypes.c_double * nr_classifier)()
            for v in vec_mat:
                # normalize vector
                v, idx = svm.gen_svm_nodearray(v.tolist())
                label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                                      dec_values)
                c = dict(c_base)  # Shallow copy
                c[svm_label_map[label]] = 1.
                yield c
Esempio n. 2
0
 def single_label(v):
     dec_values = (ctypes.c_double * nr_classifier)()
     v, idx = svm.gen_svm_nodearray(v.tolist())
     label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                           dec_values)
     c = dict(c_base)  # Shallow copy
     c[svm_label_map[label]] = 1.
     return c
Esempio n. 3
0
    def _classify(self, d):
        """
        Internal method that defines the generation of the classification map
        for a given DescriptorElement. This returns a dictionary mapping
        integer labels to a floating point value.

        :param d: DescriptorElement containing the vector to classify.
        :type d: smqtk.representation.DescriptorElement

        :raises RuntimeError: Could not perform classification for some reason
            (see message).

        :return: Dictionary mapping trained labels to classification confidence
            values
        :rtype: dict[collections.Hashable, float]

        """
        if not self.has_model():
            raise RuntimeError("No SVM model present for classification")

        # Get and normalize vector
        v = d.vector().astype(float)
        v = self._norm_vector(v)
        v, idx = svm.gen_svm_nodearray(v.tolist())

        # Effectively reproducing the body of svmutil.svm_predict in order to
        # simplify and get around excessive prints
        svm_type = self.svm_model.get_svm_type()
        nr_class = self.svm_model.get_nr_class()
        c = dict((l, 0.) for l in self.get_labels())

        if self.svm_model.is_probability_model():
            # noinspection PyUnresolvedReferences
            if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]:
                nr_class = 0
            # noinspection PyCallingNonCallable
            prob_estimates = (ctypes.c_double * nr_class)()
            svm.libsvm.svm_predict_probability(self.svm_model, v,
                                               prob_estimates)
            # Update dict
            for l, p in zip(self.svm_model.get_labels(),
                            prob_estimates[:nr_class]):
                c[self.svm_label_map[l]] = p
        else:
            # noinspection PyUnresolvedReferences
            if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC):
                nr_classifier = 1
            else:
                nr_classifier = nr_class * (nr_class - 1) // 2
            # noinspection PyCallingNonCallable
            dec_values = (ctypes.c_double * nr_classifier)()
            label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                                  dec_values)
            # Update dict
            c[self.svm_label_map[label]] = 1.

        assert len(c) == len(self.svm_label_map)
        return c
Esempio n. 4
0
    def _classify(self, d):
        """
        Internal method that defines the generation of the classification map
        for a given DescriptorElement. This returns a dictionary mapping
        integer labels to a floating point value.

        :param d: DescriptorElement containing the vector to classify.
        :type d: smqtk.representation.DescriptorElement

        :raises RuntimeError: Could not perform classification for some reason
            (see message).

        :return: Dictionary mapping trained labels to classification confidence
            values
        :rtype: dict[collections.Hashable, float]

        """
        if not self.has_model():
            raise RuntimeError("No SVM model present for classification")

        # Get and normalize vector
        v = d.vector().astype(float)
        v = self._norm_vector(v)
        v, idx = svm.gen_svm_nodearray(v.tolist())

        # Effectively reproducing the body of svmutil.svm_predict in order to
        # simplify and get around excessive prints
        svm_type = self.svm_model.get_svm_type()
        nr_class = self.svm_model.get_nr_class()
        c = dict((l, 0.) for l in self.get_labels())

        if self.svm_model.is_probability_model():
            # noinspection PyUnresolvedReferences
            if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]:
                nr_class = 0
            # noinspection PyCallingNonCallable
            prob_estimates = (ctypes.c_double * nr_class)()
            svm.libsvm.svm_predict_probability(self.svm_model, v,
                                               prob_estimates)
            # Update dict
            for l, p in zip(self.svm_model.get_labels(),
                            prob_estimates[:nr_class]):
                c[self.svm_label_map[l]] = p
        else:
            # noinspection PyUnresolvedReferences
            if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC):
                nr_classifier = 1
            else:
                nr_classifier = nr_class * (nr_class - 1) // 2
            # noinspection PyCallingNonCallable
            dec_values = (ctypes.c_double * nr_classifier)()
            label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                                  dec_values)
            # Update dict
            c[self.svm_label_map[label]] = 1.

        assert len(c) == len(self.svm_label_map)
        return c
Esempio n. 5
0
 def single_pred(v):
     prob_estimates = (ctypes.c_double * nr_class)()
     v, idx = svm.gen_svm_nodearray(v.tolist())
     svm.libsvm.svm_predict_probability(self.svm_model, v,
                                        prob_estimates)
     c = dict(c_base)  # Shallow copy
     c.update({svm_label_map[label]: prob for label, prob
               in zip(svm_model_labels, prob_estimates[:nr_class])})
     return c
Esempio n. 6
0
def predict_ion(chem_env, scatter_env, elements=None, svm_name=None):
    """
  Uses the trained classifier to predict the ions that most likely fit a given
  list of features about the site.

  Parameters
  ----------
  chem_env : mmtbx.ions.environment.ChemicalEnvironment
      A object containing information about the chemical environment at a site.
  scatter_env : mmtbx.ions.environment.ScatteringEnvironment, optional
      An object containing information about the scattering environment at a
      site.
  elements : list of str, optional
      A list of elements to include within the prediction. Must be a subset of
      mmtbx.ions.svm.ALLOWED_IONS. Note: Water is not added to elements by
      default.
  svm_name : str, optional
      The SVM to use for prediction. By default, the SVM trained on heavy atoms
      and calcium in the presence of anomalous data is used

  Returns
  -------
  list of tuple of str, float or None
      A sorted list of classes and the predicted probabilities associated with
      each or None if the trained classifier cannot be loaded.
  """

    # Load the classifier and the parameters used to interact with it
    classifier, vector_options, scaling, features = _get_classifier(svm_name)

    if classifier is None or vector_options is None:
        return None

    # Convert our data into a format that libsvm will accept
    vector = ion_vector(chem_env, scatter_env, **vector_options)
    vector = utils.scale_to([vector], scaling[0], scaling[1])[0]

    assert len(vector) == len(features)

    vector = vector[features]

    xi = svm.gen_svm_nodearray(
        list(vector),
        isKernel=classifier.param.kernel_type == svm.PRECOMPUTED,
    )[0]

    nr_class = classifier.get_nr_class()
    # prob_estimates isn't actually read by svm_predict_probability, it is only
    # written to with the final estimates. We just need to allocate space for it.
    prob_estimates = (c_double * nr_class)()
    svm.libsvm.svm_predict_probability(classifier, xi, prob_estimates)
    probs = prob_estimates[:nr_class]
    labels = [ALLOWED_IONS[i] for i in classifier.get_labels()]

    lst = zip(labels, probs)
    lst.sort(key=lambda x: -x[-1])

    if elements is not None:
        for element in elements:
            if element not in ALLOWED_IONS:
                raise Sorry("Unsupported element '{}'".format(element))

        # Filter out elements the caller does not care about
        classes, probs = [], []
        for element, prob in lst:
            if element in elements:
                classes.append(element)
                probs.append(prob)

        # Re-normalize the probabilities
        total = sum(probs)
        probs = [i / total for i in probs]
        lst = zip(classes, probs)

    return lst
state = 'single'
probs = (svm.c_double*2)(0,0)
pA = 1.
pB = 1.
maxamp = 0.
sigs_now = []
#H = np.zeros(256)

print('-')
counter = 0
tam = A.shape[0]
for i in xrange(tam):
    if i % 10000 == 0:
        sys.stdout.write('\rA:\t%d\t%d\t%f\t%d\t%f'%(i, tam, i / tam, counter, counter/(i+1)))
        sys.stdout.flush()
    x0, max_idx = svm.gen_svm_nodearray(A[i].tolist())
    c = svm.libsvm.svm_predict_probability(model, x0, probs)
    off, ch, sig_now = sig.next()
    M = abs(sig_now).max()
    pA *= probs[0] ** M
    pB *= probs[1] ** M
    maxamp += M
    sigs_now.append(sig_now)
    #H += np.abs(sg.hilbert(sig_now))
    if (i+1) % 11 == 0:
        pA = pA ** (1. / maxamp)
        pB = pB ** (1. / maxamp)

        fig = plt.figure(1,figsize=(16,16))
        timer = fig.canvas.new_timer(interval=2000)
        def close_event():
Esempio n. 8
0
probs = (svm.c_double * 2)(0, 0)
pA = 1.
pB = 1.
maxamp = 0.
sigs_now = []
#H = np.zeros(256)

print('-')
counter = 0
tam = A.shape[0]
for i in xrange(tam):
    if i % 10000 == 0:
        sys.stdout.write('\rA:\t%d\t%d\t%f\t%d\t%f' %
                         (i, tam, i / tam, counter, counter / (i + 1)))
        sys.stdout.flush()
    x0, max_idx = svm.gen_svm_nodearray(A[i].tolist())
    c = svm.libsvm.svm_predict_probability(model, x0, probs)
    off, ch, sig_now = sig.next()
    M = abs(sig_now).max()
    pA *= probs[0]**M
    pB *= probs[1]**M
    maxamp += M
    sigs_now.append(sig_now)
    #H += np.abs(sg.hilbert(sig_now))
    if (i + 1) % 11 == 0:
        pA = pA**(1. / maxamp)
        pB = pB**(1. / maxamp)

        fig = plt.figure(1, figsize=(16, 16))
        timer = fig.canvas.new_timer(interval=2000)
Esempio n. 9
0
def predict_ion(chem_env, scatter_env, elements=None, svm_name=None):
  """
  Uses the trained classifier to predict the ions that most likely fit a given
  list of features about the site.

  Parameters
  ----------
  chem_env : mmtbx.ions.environment.ChemicalEnvironment
      A object containing information about the chemical environment at a site.
  scatter_env : mmtbx.ions.environment.ScatteringEnvironment, optional
      An object containing information about the scattering environment at a
      site.
  elements : list of str, optional
      A list of elements to include within the prediction. Must be a subset of
      mmtbx.ions.svm.ALLOWED_IONS. Note: Water is not added to elements by
      default.
  svm_name : str, optional
      The SVM to use for prediction. By default, the SVM trained on heavy atoms
      and calcium in the presence of anomalous data is used

  Returns
  -------
  list of tuple of str, float or None
      A sorted list of classes and the predicted probabilities associated with
      each or None if the trained classifier cannot be loaded.
  """

  # Load the classifier and the parameters used to interact with it
  classifier, vector_options, scaling, features = _get_classifier(svm_name)

  if classifier is None or vector_options is None:
    return None

  # Convert our data into a format that libsvm will accept
  vector = ion_vector(chem_env, scatter_env, **vector_options)
  vector = utils.scale_to([vector], scaling[0], scaling[1])[0]

  assert len(vector) == len(features)

  vector = vector[features]

  xi = svm.gen_svm_nodearray(
    list(vector), isKernel=classifier.param.kernel_type == svm.PRECOMPUTED,
    )[0]

  nr_class = classifier.get_nr_class()
  # prob_estimates isn't actually read by svm_predict_probability, it is only
  # written to with the final estimates. We just need to allocate space for it.
  prob_estimates = (c_double * nr_class)()
  svm.libsvm.svm_predict_probability(classifier, xi, prob_estimates)
  probs = prob_estimates[:nr_class]
  labels = [ALLOWED_IONS[i] for i in classifier.get_labels()]

  lst = zip(labels, probs)
  lst.sort(key=lambda x: -x[-1])

  if elements is not None:
    for element in elements:
      if element not in ALLOWED_IONS:
        raise Sorry("Unsupported element '{}'".format(element))

    # Filter out elements the caller does not care about
    classes, probs = [], []
    for element, prob in lst:
      if element in elements:
        classes.append(element)
        probs.append(prob)

    # Re-normalize the probabilities
    total = sum(probs)
    probs = [i / total for i in probs]
    lst = zip(classes, probs)

  return lst
Esempio n. 10
0
prob = svm_problem([int(t) for t in TrainDS['target']],[list(i) for i in TrainDS['input']])
param = svm_parameter()
# option: -t 0: linear kernel. Best for classification.
# option: -c 0.01: regularization parameter. smaller is more regularization
# see below for all options
param.parse_options('-t 0 -c 0.01') 
print "Training svm..."
model = libsvm.svm_train(prob,param)

print "Testing svm with three random inputs"
from random import randrange
for j in range(3):
    i = randrange(0,len(TestDS))
    #again some conversion needed because of low level interface
    x0,m_idx = gen_svm_nodearray(list(TestDS['input'][i]))
    prediction = libsvm.svm_predict(model, x0)
    print("Target:{0}, prediction:{1}".format(TestDS['target'][i],prediction))
    
#test svm over test dataset
correct = 0
for j in range(len(TestDS)):
    #again some conversion needed because of low level interface
    x0,m_idx = gen_svm_nodearray(list(TestDS['input'][j]))
    prediction = libsvm.svm_predict(model, x0)
    if int(prediction) == int(TestDS['target'][j]):
        correct +=1
print "Accuracy on test set is {0}%".format(correct*100.0/len(TestDS))


###possible parameters###
Esempio n. 11
0
prob = svm_problem([int(t) for t in TrainDS['target']],
                   [list(i) for i in TrainDS['input']])
param = svm_parameter()
# option: -t 0: linear kernel. Best for classification.
# option: -c 0.01: regularization parameter. smaller is more regularization
# see below for all options
param.parse_options('-t 0 -c 0.01')
print "Training svm..."
model = libsvm.svm_train(prob, param)

print "Testing svm with three random inputs"
from random import randrange
for j in range(3):
    i = randrange(0, len(TestDS))
    #again some conversion needed because of low level interface
    x0, m_idx = gen_svm_nodearray(list(TestDS['input'][i]))
    prediction = libsvm.svm_predict(model, x0)
    print("Target:{0}, prediction:{1}".format(TestDS['target'][i], prediction))

#test svm over test dataset
correct = 0
for j in range(len(TestDS)):
    #again some conversion needed because of low level interface
    x0, m_idx = gen_svm_nodearray(list(TestDS['input'][j]))
    prediction = libsvm.svm_predict(model, x0)
    if int(prediction) == int(TestDS['target'][j]):
        correct += 1
print "Accuracy on test set is {0}%".format(correct * 100.0 / len(TestDS))

###possible parameters###
# options:
Esempio n. 12
0
import svm


labels = [0, 1]
samples = [[0, 0], [0, 1]]

labels = [0, 1, 1, 2]
samples = [[0, 0], [0, 1], [1, 0], [1, 1]]

import svm

labels = [0, 0, 1, 1]
samples = [[1, 1], [1, -1], [-1, 1], [-1, -1]]

param = svm.svm_parameter("-c 1")
problem = svm.svm_problem(labels, samples)

model = svm.libsvm.svm_train(problem, param)
pmodel = svm.toPyModel(model)
pmodel.predict_values(samples[0])
for i in range(len(samples)):
    print svm.libsvm.svm_predict(model, svm.gen_svm_nodearray(samples[i])[0])


r = (c_double * 6)()
svm.libsvm.svm_predict_values(model, svm.gen_svm_nodearray(samples[0])[0], r)
Esempio n. 13
0
    def _classify(self, d):
        """
        Internal method that constructs the label-to-confidence map (dict) for
        a given DescriptorElement.

        The passed descriptor element is guaranteed to have a vector to
        extract. It is not extracted yet due to the philosophy of waiting
        until the vector is immediately needed. This moment is thus determined
        by the implementing algorithm.

        :param d: DescriptorElement containing the vector to classify.
        :type d: smqtk.representation.DescriptorElement

        :raises RuntimeError: Could not perform classification for some reason
            (see message in raised exception).

        :return: Dictionary mapping trained labels to classification confidence
            values
        :rtype: dict[collections.Hashable, float]

        """
        if not self.has_model():
            raise RuntimeError("No SVM model present for classification")

        # Get and normalize vector
        v = d.vector().astype(float)
        v = self._norm_vector(v)
        v, idx = svm.gen_svm_nodearray(v.tolist())

        # Effectively reproducing the body of svmutil.svm_predict in order to
        # simplify and get around excessive prints
        svm_type = self.svm_model.get_svm_type()
        nr_class = self.svm_model.get_nr_class()
        c = dict((l, 0.) for l in self.get_labels())

        if self.svm_model.is_probability_model():
            # noinspection PyUnresolvedReferences
            if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]:
                nr_class = 0
            # noinspection PyCallingNonCallable,PyTypeChecker
            prob_estimates = (ctypes.c_double * nr_class)()
            svm.libsvm.svm_predict_probability(self.svm_model, v,
                                               prob_estimates)
            # Update dict
            for l, p in zip(self.svm_model.get_labels(),
                            prob_estimates[:nr_class]):
                c[self.svm_label_map[l]] = p
        else:
            # noinspection PyUnresolvedReferences
            if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC):
                nr_classifier = 1
            else:
                nr_classifier = nr_class * (nr_class - 1) // 2
            # noinspection PyCallingNonCallable,PyTypeChecker
            dec_values = (ctypes.c_double * nr_classifier)()
            label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                                  dec_values)
            # Update dict
            c[self.svm_label_map[label]] = 1.

        assert len(c) == len(self.svm_label_map)
        return c
Esempio n. 14
0
    def _classify(self, d):
        """
        Internal method that constructs the label-to-confidence map (dict) for
        a given DescriptorElement.

        The passed descriptor element is guaranteed to have a vector to
        extract. It is not extracted yet due to the philosophy of waiting
        until the vector is immediately needed. This moment is thus determined
        by the implementing algorithm.

        :param d: DescriptorElement containing the vector to classify.
        :type d: smqtk.representation.DescriptorElement

        :raises RuntimeError: Could not perform classification for some reason
            (see message in raised exception).

        :return: Dictionary mapping trained labels to classification confidence
            values
        :rtype: dict[collections.Hashable, float]

        """
        if not self.has_model():
            raise RuntimeError("No SVM model present for classification")

        # Get and normalize vector
        v = d.vector().astype(float)
        v = self._norm_vector(v)
        v, idx = svm.gen_svm_nodearray(v.tolist())

        # Effectively reproducing the body of svmutil.svm_predict in order to
        # simplify and get around excessive prints
        svm_type = self.svm_model.get_svm_type()
        nr_class = self.svm_model.get_nr_class()
        c = dict((l, 0.) for l in self.get_labels())

        if self.svm_model.is_probability_model():
            # noinspection PyUnresolvedReferences
            if svm_type in [svm.NU_SVR, svm.EPSILON_SVR]:
                nr_class = 0
            # noinspection PyCallingNonCallable,PyTypeChecker
            prob_estimates = (ctypes.c_double * nr_class)()
            svm.libsvm.svm_predict_probability(self.svm_model, v,
                                               prob_estimates)
            # Update dict
            for l, p in zip(self.svm_model.get_labels(),
                            prob_estimates[:nr_class]):
                c[self.svm_label_map[l]] = p
        else:
            # noinspection PyUnresolvedReferences
            if svm_type in (svm.ONE_CLASS, svm.EPSILON_SVR, svm.NU_SVC):
                nr_classifier = 1
            else:
                nr_classifier = nr_class * (nr_class - 1) // 2
            # noinspection PyCallingNonCallable,PyTypeChecker
            dec_values = (ctypes.c_double * nr_classifier)()
            label = svm.libsvm.svm_predict_values(self.svm_model, v,
                                                  dec_values)
            # Update dict
            c[self.svm_label_map[label]] = 1.

        assert len(c) == len(self.svm_label_map)
        return c
 def calc_distance(self, x):
     svm_node_array, _ = svm.gen_svm_nodearray(x)
     distance = svmutil.svm_distance_from_plane(svm_node_array,
                                                self.svm_model)
     return abs(distance[0])
Esempio n. 16
0
import svm

labels = [0, 1]
samples = [[0, 0], [0, 1]]

labels = [0, 1, 1, 2]
samples = [[0, 0], [0, 1], [1, 0], [1, 1]]

import svm

labels = [0, 0, 1, 1]
samples = [[1, 1], [1, -1], [-1, 1], [-1, -1]]

param = svm.svm_parameter('-c 1')
problem = svm.svm_problem(labels, samples)

model = svm.libsvm.svm_train(problem, param)
pmodel = svm.toPyModel(model)
pmodel.predict_values(samples[0])
for i in range(len(samples)):
    print svm.libsvm.svm_predict(model, svm.gen_svm_nodearray(samples[i])[0])

r = (c_double * 6)()
svm.libsvm.svm_predict_values(model, svm.gen_svm_nodearray(samples[0])[0], r)