コード例 #1
0
ファイル: test_extras.py プロジェクト: SylvainCorlay/numpy
 def test_unique_allmasked(self):
     # Test all masked
     data = masked_array([1, 1, 1], mask=True)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, ], mask=[True]))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0, 0, 0])
     #
     # Test masked
     data = masked
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array(masked))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0])
コード例 #2
0
ファイル: test_extras.py プロジェクト: tws0002/hman
 def test_unique_allmasked(self):
     # Test all masked
     data = masked_array([1, 1, 1], mask=True)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, ], mask=[True]))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0, 0, 0])
     #
     # Test masked
     data = masked
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array(masked))
     assert_equal(test[1], [0])
     assert_equal(test[2], [0])
コード例 #3
0
ファイル: test_extras.py プロジェクト: niccalle/numpy
 def test_unique_onmaskedarray(self):
     # Test unique on masked data w/use_mask=True
     data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0])
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
     #
     data.fill_value = 3
     data = masked_array(data=[1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0], fill_value=3)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
コード例 #4
0
ファイル: test_extras.py プロジェクト: tws0002/hman
 def test_unique_onmaskedarray(self):
     # Test unique on masked data w/use_mask=True
     data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0])
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
     #
     data.fill_value = 3
     data = masked_array(data=[1, 1, 1, 2, 2, 3],
                         mask=[0, 0, 1, 0, 1, 0], fill_value=3)
     test = unique(data, return_index=True, return_inverse=True)
     assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1]))
     assert_equal(test[1], [0, 3, 5, 2])
     assert_equal(test[2], [0, 0, 3, 1, 3, 2])
コード例 #5
0
 def adapt(self, mcmc_chain, step_output):
     # only learn the proposal once, at a pre-specified iteration
     if mcmc_chain.iteration == self.num_samples_when_to_switch:
         iter_no = mcmc_chain.iteration
         inds = randint(iter_no - self.num_sample_discard, size=self.num_samples_gmm) + self.num_sample_discard
         unique_inds = unique(inds)
         self.proposal = self.fit_gmm(mcmc_chain.samples[unique_inds])
コード例 #6
0
ファイル: main.py プロジェクト: Primer42/TuftComp136
def calcN(classKernels, trainLabels):
    N = zeros((len(trainLabels), len(trainLabels)))
    for i, l in enumerate(unique(trainLabels)):
        numExamplesWithLabel = len(where(trainLabels == l)[0])
        Idiff = identity(numExamplesWithLabel, Float64) - (1.0 / numExamplesWithLabel) * ones(numExamplesWithLabel, Float64)
        firstDot = dot(classKernels[i], Idiff)
        labelTerm = dot(firstDot, transpose(classKernels[i]))
        N += labelTerm
    N = nan_to_num(N)
    #make N more numerically stable
    #if I had more time, I would train this parameter, but I don't
    additionToN = ((mean(diag(N)) + 1) / 100.0) * identity(N.shape[0], Float64) 
    N += additionToN
            
    #make sure N is invertable
    for i in range(1000):
        try:
            inv(N)
        except LinAlgError:
            #doing this to make sure the maxtrix is invertable
            #large value supported by section titled
            #"numerical issues and regularization" in the paper
            N += additionToN

    return N
コード例 #7
0
    def adapt(self, mcmc_chain, step_output):
        """
        Updates the sliding window of samples to use
        """
        iter_no = mcmc_chain.iteration
        samples = mcmc_chain.samples[0:(iter_no + 1)]

        # only adapt after discard has passed
        if iter_no > self.sample_discard:

            if iter_no < self.sample_discard + self.num_samples_Z:
                # use all samples after discard if not yet enough
                self.Z = samples[self.sample_discard:(iter_no + 1)]
            else:
                # stop adapting at some point
                if iter_no < self.stop_adapt:
                    # once enough samples, use random subset with repetition
                    # and remove duplicates. Sampling without repetition is too expensive
                    inds = randint(
                        iter_no - self.sample_discard,
                        size=self.num_samples_Z) + self.sample_discard
                    unique_inds = unique(inds)
                    #                print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples"

                    self.Z = samples[unique_inds]
コード例 #8
0
 def test_unique_onlist(self):
     # Test unique on list
     data = [1, 1, 1, 2, 2, 3]
     test = unique(data, return_index=True, return_inverse=True)
     self.assertTrue(isinstance(test[0], MaskedArray))
     assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0]))
     assert_equal(test[1], [0, 3, 5])
     assert_equal(test[2], [0, 0, 0, 1, 1, 2])
コード例 #9
0
ファイル: test_extras.py プロジェクト: SylvainCorlay/numpy
 def test_unique_onlist(self):
     # Test unique on list
     data = [1, 1, 1, 2, 2, 3]
     test = unique(data, return_index=True, return_inverse=True)
     self.assertTrue(isinstance(test[0], MaskedArray))
     assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0]))
     assert_equal(test[1], [0, 3, 5])
     assert_equal(test[2], [0, 0, 0, 1, 1, 2])
コード例 #10
0
ファイル: GMMMetropolis.py プロジェクト: mgong2/kameleon-mcmc
 def adapt(self, mcmc_chain, step_output):
     # only learn the proposal once, at a pre-specified iteration
     if mcmc_chain.iteration == self.num_samples_when_to_switch:
         iter_no = mcmc_chain.iteration
         inds = randint(iter_no - self.num_sample_discard,
                        size=self.num_samples_gmm) + self.num_sample_discard
         unique_inds = unique(inds)
         self.proposal = self.fit_gmm(mcmc_chain.samples[unique_inds])
コード例 #11
0
ファイル: DataObject.py プロジェクト: sha0510/pyisc
    def __init__(self, X, format=None, class_column=None, classes='auto'):
        '''
        The DataObject class represents the data analysed using a AnomalyDetector.

        X can be an Format instance or an numpy array. In the previous case, we assume
        it is used to describe the content that is added to the object using add2Darray
        or add1Darray methods. In the other case, we automatically generate a format instance,
        unless the format argument is provided. If the class_column is specified, we use it
        to generate a column in the auto-generated format where the elements are index into
        the classes_ list. If the classes_ list is set to 'auto', the elements in X of the
        class_column are used to auto-create a classes_ list.

        :param X: a Format instance or a numpy array
        :param format: None or a pyisc Format instance
        :param class_column: None or an integer
        :param classes: 'auto' or a list of elements in X[class_column]
        :return:
        '''
        self.class_column = class_column
        if isinstance(X, pyisc._DataObject):
            pyisc._DataObject.__init__(self,X.get_isc_data_object())
            return
        elif isinstance(X, pyisc.Format):
            self._format = X
            pyisc._DataObject.__init__(self,X)
            return
        elif isinstance(X, ndarray):
                if format is None:
                    format = Format()
                    num_cols = len(X.T)
                    if class_column is not None:
                        assert class_column >= 0 and class_column < num_cols
                    for col in range(num_cols):
                        if col != class_column:
                            format.addColumn("Column %i"%col, Format.Continuous)
                        else:
                            format.addColumn("Column %i"%col, Format.Symbol)
                            A =  X.T.copy()
                            if classes == 'auto':
                                self.classes_ =  list(sorted(unique(A[class_column])))
                            else:
                                self.classes_ = classes
                            class_col = format.get_nth_column(class_column)
                            for c in self.classes_:
                                class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c))
                            A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]]
                            X = A.T
                    self._format = format
                    if X.ndim == 1: # This fixes a problem of converting it to c++ data object
                        X = array([X.copy()]).T

                    pyisc._DataObject.__init__(self,format,X.astype(float))
                    return
                elif isinstance(format, pyisc.Format):
                    self._format = format
                    pyisc._DataObject.__init__(self,format,X)
                    return
        pyisc._DataObject.__init__(self,X)
コード例 #12
0
    def __init__(self, X, format=None, class_column=None, classes='auto'):
        '''
        The DataObject class represents the data analysed using a AnomalyDetector.

        X can be an Format instance or an numpy array. In the previous case, we assume
        it is used to describe the content that is added to the object using add2Darray
        or add1Darray methods. In the other case, we automatically generate a format instance,
        unless the format argument is provided. If the class_column is specified, we use it
        to generate a column in the auto-generated format where the elements are index into
        the classes_ list. If the classes_ list is set to 'auto', the elements in X of the
        class_column are used to auto-create a classes_ list.

        :param X: a Format instance or a numpy array
        :param format: None or a pyisc Format instance
        :param class_column: None or an integer
        :param classes: 'auto' or a list of elements in X[class_column]
        :return:
        '''
        self.class_column = class_column
        if isinstance(X, pyisc.Format):
            self._format = X
            pyisc._DataObject.__init__(self,X)
            return
        elif isinstance(X, ndarray):
            if format is None:
                format = Format()
                num_cols = len(X.T)
                if class_column is not None:
                    assert class_column >= 0 and class_column < num_cols
                for col in range(num_cols):
                    if col != class_column:
                        format.addColumn("Column %i"%col, Format.Continuous)
                    else:
                        format.addColumn("Column %i"%col, Format.Symbol)
                        A =  X.T.copy()
                        if classes == 'auto':
                            self.classes_ =  list(sorted(unique(A[class_column])))
                        else:
                            self.classes_ = classes
                        class_col = format.get_nth_column(class_column)
                        for c in self.classes_:
                            class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c))
                        A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]]
                        X = A.T
                self._format = format
                if X.ndim == 1: # This fixes a problem of converting it to c++ data object
                    X = array([X.copy()]).T

                pyisc._DataObject.__init__(self,format,X.astype(float))
                return
            elif isinstance(format, pyisc.Format):
                self._format = format
                pyisc._DataObject.__init__(self,format,X)
                return
        pyisc._DataObject.__init__(self,X)
コード例 #13
0
ファイル: main.py プロジェクト: Primer42/TuftComp136
def getClassKernels(fullKernelMatrix, trainLabels):
    #create a matrix where rows correspond to all examples
    #and columns correspond to examples of a specific class
    #so if l is the total number of examples, and lj is the number of examples in class j
    #then we're creating an l x lj matrix
    uniqueLabels = unique(trainLabels)
    ret = []
    for l in uniqueLabels:
        labelIndexes = where(trainLabels == l)[0]
        k = zeros((len(fullKernelMatrix), len(labelIndexes)))
        for r in range(len(k)):
            for c in range(len(k[r])):
                k[r][c] = fullKernelMatrix[r][labelIndexes[c]]
        ret.append(k)
    return ret        
コード例 #14
0
    def adapt(self, mcmc_chain, step_output):
        """
        Updates the sliding window of samples to use
        """
        iter_no = mcmc_chain.iteration
        samples = mcmc_chain.samples[0 : (iter_no + 1)]

        # only adapt after discard has passed
        if iter_no > self.sample_discard:

            if iter_no < self.sample_discard + self.num_samples_Z:
                # use all samples after discard if not yet enough
                self.Z = samples[self.sample_discard : (iter_no + 1)]
            else:
                # stop adapting at some point
                if iter_no < self.stop_adapt:
                    # once enough samples, use random subset with repetition
                    # and remove duplicates. Sampling without repetition is too expensive
                    inds = randint(iter_no - self.sample_discard, size=self.num_samples_Z) + self.sample_discard
                    unique_inds = unique(inds)
                    #                print len(inds) - len(unique_inds), "collisions and", len(unique_inds), "unique samples"

                    self.Z = samples[unique_inds]
コード例 #15
0
ファイル: main.py プロジェクト: Primer42/TuftComp136
def calcM(classKernelList, trainLabels):
    Mlist = []
    for (classKernel, label) in zip(classKernelList, unique(trainLabels)):
        Mlist.append(calcClassM(classKernel, trainLabels, label))
    Mdiff = Mlist[0] - Mlist[1]
    return outer(Mdiff, Mdiff)