Example #1
    def __train__(self, data, labels):
        l = labels.reshape((-1,1))
        self.__trainingData__ = data
        self.__trainingLabels__ = l
        N = len(l)
        H = zeros((N,N))
        for i in range(N):
            for j in range(N):
                H[i,j] = self.__trainingLabels__[i]*self.__trainingLabels__[j]*self.__kernelFunc__(self.__trainingData__[i],self.__trainingData__[j])
        f = -1.0*ones(labels.shape)
        lb = zeros(labels.shape)
        ub = self.C * ones(labels.shape)
        Aeq = labels
        beq = 0.0
        suppressOut = True
        if suppressOut:
            devnull = open('/dev/null', 'w')
            oldstdout_fno = os.dup(sys.stdout.fileno())
            os.dup2(devnull.fileno(), 1)
        p = QP(matrix(H),f.tolist(),lb=lb.tolist(),ub=ub.tolist(),Aeq=Aeq.tolist(),beq=beq)
        r = p.solve('cvxopt_qp')
        if suppressOut:
            os.dup2(oldstdout_fno, 1)
        lim = 1e-4
        r.xf[where(abs(r.xf)<lim)] = 0
        self.__lambdas__ = r.xf
        nonzeroindexes = where(r.xf>lim)[0]
#        l1 = nonzeroindexes[0]
#        self.w0 = 1.0/labels[l1]-dot(self.w,data[l1])
        self.numSupportVectors = len(nonzeroindexes)
Example #2
def select(condlist, choicelist, default=0):
    """ Return an array composed of different elements of choicelist
    depending on the list of conditions.

    condlist is a list of condition arrays containing ones or zeros

    choicelist is a list of choice arrays (of the "same" size as the
    arrays in condlist).  The result array has the "same" size as the
    arrays in choicelist.  If condlist is [c0, ..., cN-1] then choicelist
    must be of length N.  The elements of the choicelist can then be
    represented as [v0, ..., vN-1]. The default choice if none of the
    conditions are met is given as the default argument.

    The conditions are tested in order and the first one statisfied is
    used to select the choice. In other words, the elements of the
    output array are found from the following tree (notice the order of
    the conditions matters):

    if c0: v0
    elif c1: v1
    elif c2: v2
    elif cN-1: vN-1
    else: default

    Note that one of the condition arrays must be large enough to handle
    the largest array in the choice list.

    n = len(condlist)
    n2 = len(choicelist)
    if n2 != n:
        raise ValueError, "list of cases must be same length as list of conditions"
    choicelist.insert(0, default)
    S = 0
    pfac = 1
    for k in range(1, n+1):
        S += k * pfac * asarray(condlist[k-1])
        if k < n:
            pfac *= (1-asarray(condlist[k-1]))
    # handle special case of a 1-element condition but
    #  a multi-element choice
    if type(S) in ScalarType or max(asarray(S).shape)==1:
        pfac = asarray(1)
        for k in range(n2+1):
            pfac = pfac + asarray(choicelist[k])
        if type(S) in ScalarType:
            S = S*ones(asarray(pfac).shape, type(S))
            S = S*ones(asarray(pfac).shape, S.dtype)
    return choose(S, tuple(choicelist))
Example #3
Example #4
def polyint(p, m=1, k=None):
    """Return the mth analytical integral of the polynomial p.

    If k is None, then zero-valued constants of integration are used.
    otherwise, k should be a list of length m (or a scalar if m=1) to
    represent the constants of integration to use for each integration
    (starting with k[0])
    m = int(m)
    if m < 0:
        raise ValueError, "Order of integral must be positive (see polyder)"
    if k is None:
        k = NX.zeros(m, float)
    k = atleast_1d(k)
    if len(k) == 1 and m > 1:
        k = k[0]*NX.ones(m, float)
    if len(k) < m:
        raise ValueError, \
              "k must be a scalar or a rank-1 array of length 1 or >m."
    if m == 0:
        return p
        truepoly = isinstance(p, poly1d)
        p = NX.asarray(p)
        y = NX.zeros(len(p)+1, float)
        y[:-1] = p*1.0/NX.arange(len(p), 0, -1)
        y[-1] = k[0]
        val = polyint(y, m-1, k=k[1:])
        if truepoly:
            val = poly1d(val)
        return val
Example #5
def smooth(x,window_len=11,window='hanning'):
    """smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.

        x: the input signal
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

        the smoothed signal



    see also:

    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve

    TODO: the window parameter could be the window itself if an array instead of a string
    from numpy.core.numeric import ones
    import numpy
    x = numpy.array(x)

    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
    #if len(x) < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."

    if window_len<3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"

    if window == 'flat': #moving average

    return y[window_len-1:-window_len+1]
Example #6
 def lpc(self, x0 = None, X=None, weights = None):
   ''' Will return the scaled curve if self._lpcParameters['scaled'] = True, to return the curve on the same scale as the originally input data, call getCurve with unscale = True
   x0 : 2-dim numpy.array containing #rows equal to number of explicitly defined start points
   and #columns equal to dimension of the feature space points; seeds for the start points algorithm
   X : 2-dim numpy.array containing #rows equal to number of data points and #columns equal to dimension 
   of the feature space points   
   weights : see self._followxSingleDirection docs
   if X is None:
     if self.Xi is None:
       raise ValueError, 'Data points have not yet been set in this LPCImpl instance. Either supply as X parameter to this function or call setDataPoints'
   N = self.Xi.shape[0]
   if self._lpcParameters['binary'] or weights is None:
     self._weights = ones(N, dtype = float)
     self._weights = array(weights, dtype = float)
     if self._weights.shape != (N):
       raise ValueError, 'Weights must be one dimensional of vector of weights with size equal to the sample size'
   #TODO add initialization relevant for other branches
   m = self.x0.shape[0] #how many starting points were actually generated
   way = self._lpcParameters['way']
   self._curve = [self._followx(self.x0[j], way = way, weights = self._weights) for j in range(m)]
   return self._curve
Example #7
def gmmEM(data, K, it,show=False,usekmeans=True):
    #data += finfo(float128).eps*100
    centroid = kmeans2(data, K)[0] if usekmeans else ((max(data) - min(data))*random_sample((K,data.shape[1])) + min(data))
    N = data.shape[0]
    gmm = GaussianMM(centroid)
    if show: gmm.draw(data)
    while it > 0:
        print it," iterations remaining"
        it = it - 1
        # e-step
        gausses = zeros((K, N), dtype = data.dtype)
        for k in range(0, K):
            gausses[k] = gmm.c[k]*mulnormpdf(data, gmm.mean[k], gmm.covm[k])
        sums = sum(gausses, axis=0)
        if count_nonzero(sums) != sums.size:
            raise "Divide by Zero"
        gausses /= sums
        # m step
        sg = sum(gausses, axis=1)
        if count_nonzero(sg) != sg.size:
            raise "Divide by Zero"
        gmm.c = ones(sg.shape) / N * sg
        for k in range(0, K):
            gmm.mean[k] = sum(data * gausses[k].reshape((-1,1)), axis=0) / sg[k]
            d = data - gmm.mean[k]
            d1 = d.transpose()*gausses[k]
        if show: gmm.draw(data)
    return gmm
Example #8
def vander(x, N=None):
    Generate a Van der Monde matrix.

    The columns of the output matrix are decreasing powers of the input
    vector.  Specifically, the i-th output column is the input vector to
    the power of ``N - i - 1``. Such a matrix with a geometric progression
    in each row is named Van Der Monde, or Vandermonde matrix, from
    Alexandre-Theophile Vandermonde.

    x : array_like
        1-D input array.
    N : int, optional
        Order of (number of columns in) the output. If `N` is not specified,
        a square array is returned (``N = len(x)``).

    out : ndarray
        Van der Monde matrix of order `N`.  The first column is ``x^(N-1)``,
        the second ``x^(N-2)`` and so forth.

    .. [1] Wikipedia, "Vandermonde matrix",

    >>> x = np.array([1, 2, 3, 5])
    >>> N = 3
    >>> np.vander(x, N)
    array([[ 1,  1,  1],
           [ 4,  2,  1],
           [ 9,  3,  1],
           [25,  5,  1]])

    >>> np.column_stack([x**(N-1-i) for i in range(N)])
    array([[ 1,  1,  1],
           [ 4,  2,  1],
           [ 9,  3,  1],
           [25,  5,  1]])

    >>> x = np.array([1, 2, 3, 5])
    >>> np.vander(x)
    array([[  1,   1,   1,   1],
           [  8,   4,   2,   1],
           [ 27,   9,   3,   1],
           [125,  25,   5,   1]])

    x = asarray(x)
    if N is None:
        N = len(x)
    X = ones((len(x), N), x.dtype)
    for i in range(N - 1):
        X[:, i] = x ** (N - i - 1)
    return X
Example #9
Example #10
 def train(self, data, labels):
     l = labels.reshape((-1,1))
     xy = data * l
     H = dot(xy,transpose(xy))
     f = -1.0*ones(labels.shape)
     lb = zeros(labels.shape)
     ub = self.C * ones(labels.shape)
     Aeq = labels
     beq = 0.0
     p = QP(matrix(H),f.tolist(),lb=lb.tolist(),ub=ub.tolist(),Aeq=Aeq.tolist(),beq=beq)
     r = p.solve('cvxopt_qp')
     r.xf[where(r.xf<1e-3)] = 0
     self.w = dot(r.xf*labels,data)
     nonzeroindexes = where(r.xf>1e-4)[0]
     l1 = nonzeroindexes[0]
     self.w0 = 1.0/labels[l1]-dot(self.w,data[l1])
     self.numSupportVectors = len(nonzeroindexes)
Example #11
def hamming(M):
    """hamming(M) returns the M-point Hamming window.
    if M < 1:
        return array([])
    if M == 1:
        return ones(1, float)
    n = arange(0, M)
    return 0.54 - 0.46 * cos(2.0 * pi * n / (M - 1))
Example #12
    def readFile(self, filePath, fileSize):
            file format
            index1    xx.xxx    yy.yyy
            index2    xx.xxx    yy.yyy
            index3    xx.xxx    yy.yyy
            we eliminate the 'index' and extract two values into a set.

            __fpath = filePath
            # path of the file
            __size = fileSize
            # number of data items in file

            # these values may subject to change depending on the data.txt format
            __firstValStart = 2
            __firstValEnd = 12
            __secondValStart = 13
            __secondValEnd = 22

            from numpy import float64

            # 3 columns. bias value, first value, second value

            __array = ones((__size, 3), float64)

            f = open(__fpath,

            print('reading data from file....')
            for i in range(0, __size):

                line = f.readline()
                __firstValue = line[__firstValStart:__firstValEnd]
                __secondValue = line[__secondValStart:__secondValEnd]

                __array[i, 1] = __firstValue
                __array[i, 2] = __secondValue

            print('data reading complete....')

            return __array

        except IOError:
Example #13
def tranNBO(trainMatrix,trainCategory):
    numTrainDocs = len(trainMatrix)
    numWords = len(trainMatrix[0])
    pAbusive = sum(trainCategory)/float(numTrainDocs) #某个类发生的概率
    p0Num = ones(numWords)
    p1Num = ones(numWords) #初始样本个数为1,防止条件概率为0,影响结果       
    p0Denom = 2.0
    p1Denom = 2.0
    for i in range(numTrainDocs):
        if trainCategory[i] == 1:
            p1Num += trainMatrix[i]
            p1Denom += sum(trainMatrix[i])
            p0Num += trainMatrix[i]
            p0Denom += sum(trainMatrix[i])
    p1Vect = log(p1Num/p1Denom) #计算类标签为1时,其它属性发生的条件概率
    p0Vect = log(p0Num/p0Denom) #计算类标签为0时,其它属性发生的条件概率
    return p0Vect,p1Vect,pAbusive #返回条件概率喝类标签为1的概率
Example #14
def blackman(M):
    """blackman(M) returns the M-point Blackman window.
    if M < 1:
        return array([])
    if M == 1:
        return ones(1, float)
    n = arange(0,M)
    return 0.42-0.5*cos(2.0*pi*n/(M-1)) + 0.08*cos(4.0*pi*n/(M-1))
Example #16
def bartlett(M):
    """bartlett(M) returns the M-point Bartlett window.
    if M < 1:
        return array([])
    if M == 1:
        return ones(1, float)
    n = arange(0,M)
    return where(less_equal(n,(M-1)/2.0),2.0*n/(M-1),2.0-2.0*n/(M-1))
Example #17
 def __init__(self,
              x0=array([25 * ones(10)]),
     super(RateAgent, self).__init__(policy, predictionAgent, lr=lr, x0=x0)
     self.gamma = gamma
     self.learner = self.inicLearner(learner)
Example #18
 def readFile(self, filePath,fileSize):
         file format
         index1    xx.xxx    yy.yyy
         index2    xx.xxx    yy.yyy
         index3    xx.xxx    yy.yyy
         we eliminate the 'index' and extract two values into a set.
         __fpath=filePath; # path of the file
         __size=fileSize; # number of data items in file
         # these values may subject to change depending on the data.txt format
         from numpy import float64
         # 3 columns. bias value, first value, second value 
         __array= ones((__size,3),float64);
         f=open(__fpath, mode='r', buffering=1, encoding=None, errors=None, newline=None, closefd=True); 
         print('reading data from file....');
         for i in range(0,__size):
             line= f.readline();
             __firstValue= line[__firstValStart:__firstValEnd];
             __array[i,1]= __firstValue
             __array[i,2]= __secondValue;
         print('data reading complete....');
         return __array;
     except IOError:
Example #19
 def __train__(self, data, labels):
     l = labels.reshape((-1,1))
     xy = data * l
     H = dot(xy,transpose(xy))
     f = -1.0*ones(labels.shape)
     lb = zeros(labels.shape)
     ub = self.C * ones(labels.shape)
     Aeq = labels
     beq = 0.0
     devnull = open('/dev/null', 'w')
     oldstdout_fno = os.dup(sys.stdout.fileno())
     os.dup2(devnull.fileno(), 1)
     p = QP(matrix(H),f.tolist(),lb=lb.tolist(),ub=ub.tolist(),Aeq=Aeq.tolist(),beq=beq)
     r = p.solve('cvxopt_qp')
     os.dup2(oldstdout_fno, 1)
     lim = 1e-4
     r.xf[where(r.xf<lim)] = 0
     self.w = dot(r.xf*labels,data)
     nonzeroindexes = where(r.xf>lim)[0]
     l1 = nonzeroindexes[0]
     self.w0 = 1.0/labels[l1]-dot(self.w,data[l1])
     self.numSupportVectors = len(nonzeroindexes)
Example #20
def logisticRegression(trainData, trainLabels, testData, testLabels):
    #adjust the data, adding the 'free parameter' to the train data
    trainDataWithFreeParam = hstack((trainData.copy(), ones(trainData.shape[0])[:,newaxis]))
    testDataWithFreeParam = hstack((testData.copy(), ones(testData.shape[0])[:,newaxis]))
    alpha = 10
    oldW = zeros(trainDataWithFreeParam.shape[1])
    newW = ones(trainDataWithFreeParam.shape[1])
    iteration = 0
    trainDataWithFreeParamTranspose = transpose(trainDataWithFreeParam)
    alphaI = alpha * identity(oldW.shape[0])
    while not array_equal(oldW, newW):
        if iteration == 100:
        oldW = newW.copy()
        yVect = yVector(oldW, trainDataWithFreeParam)
        r = R(yVect)

        firstTerm = inv(alphaI + dot(dot(trainDataWithFreeParamTranspose, r), trainDataWithFreeParam))
        secondTerm = dot(trainDataWithFreeParamTranspose, (yVect-trainLabels)) + alpha * oldW
        newW = oldW - dot(firstTerm, secondTerm)
        iteration += 1
    #see how well we did
    numCorrect  = 0
    for x,t in izip(testDataWithFreeParam, testLabels):
        if yScalar(newW, x) >= 0.5:
            if t == 1:
                numCorrect += 1
            if t == 0:
                numCorrect += 1
    return float(numCorrect) / float(len(testLabels))
Example #21
Example #22
Example #23
def stocGradAscent0(dataMatrix, classLabels):
    m, n = np.shape(dataMatrix)
    alpha = 0.01
    weights = ones(n)
    for i in range(m):
        h = sigmoid(sum(dataMatrix[i] * weights))  #向量
        error = classLabels[i] - h  #向量
        weights = weights + alpha * error * dataMatrix[i]
        # numpy.append(arr, values, axis=None):就是arr和values会重新组合成一个新的数组,做为返回值。
        # 当axis无定义时,是横向加成,返回总是为一维数组
    return weights
Example #24
def problem1(data, figureDir, figureName, targetValue):
    figureOutLoc = os.path.join(figureDir, '1', figureName + ".eps")
    if os.path.exists(figureOutLoc):
    if not os.path.exists(os.path.dirname(figureOutLoc)):
    trainList = []
    testList = []
    for l in range(151):
        w = doubleU(phi(data[TRAIN]), l, tListToTVector(data[TRAIN_LABELS]))
        trainMSE = MSE(data[TRAIN], w, data[TRAIN_LABELS])
        testMSE = MSE(data[TEST], w, data[TEST_LABELS])
    trainArray = squeeze(row_stack(trainList))
    testArray = squeeze(row_stack(testList))
    #find the best l value on the test set
    targetArray = targetValue * ones(151, dtype=numpy.float64)
    targetDiffArray = testArray - targetArray
    bestL = argmin(targetDiffArray)
    lArray = arange(151).reshape(-1)

    plt.plot(lArray, trainArray, '-', label="Train")
    plt.plot(lArray, testArray, '--', label="Test")
    plt.plot(lArray, targetArray, ':', label="Target")
    plt.ylim(ymax = min((plt.ylim()[1], 7.0)))
    #add a label showing the min value, and annotate it's lvalue
    if series == 'wine':
        annotateOffset = .02
        annotateOffset = .2
    plt.annotate("Best lambda value = " + str(bestL) + " MSE = %.3f" %testList[bestL], 
                 xy=(bestL, testArray[bestL]), 
                 xytext=(bestL + 10, testArray[bestL] - annotateOffset),
                 bbox=dict(boxstyle="round", fc="0.8"), 
Example #25
Example #26
Example #27
def roots(p):
    """ Return the roots of the polynomial coefficients in p.

        The values in the rank-1 array p are coefficients of a polynomial.
        If the length of p is n+1 then the polynomial is
        p[0] * x**n + p[1] * x**(n-1) + ... + p[n-1]*x + p[n]
    # If input is scalar, this makes it an array
    p = atleast_1d(p)
    if len(p.shape) != 1:
        raise ValueError,"Input must be a rank-1 array."

    # find non-zero array entries
    non_zero = NX.nonzero(NX.ravel(p))[0]

    # Return an empty array if polynomial is all zeros
    if len(non_zero) == 0:
        return NX.array([])

    # find the number of trailing zeros -- this is the number of roots at 0.
    trailing_zeros = len(p) - non_zero[-1] - 1

    # strip leading and trailing zeros
    p = p[int(non_zero[0]):int(non_zero[-1])+1]

    # casting: if incoming array isn't floating point, make it floating point.
    if not issubclass(p.dtype.type, (NX.floating, NX.complexfloating)):
        p = p.astype(float)

    N = len(p)
    if N > 1:
        # build companion matrix and find its eigenvalues (the roots)
        A = diag(NX.ones((N-2,), p.dtype), -1)
        A[0, :] = -p[1:] / p[0]
        roots = _eigvals(A)
        roots = NX.array([])

    # tack any zeros onto the back of the array
    roots = hstack((roots, NX.zeros(trailing_zeros, roots.dtype)))
    return roots
Example #28
def stocGradAscent1(dataMatrix, classLabels, numIter=150):
    m, n = np.shape(dataMatrix)
    weights = ones(n)
    for j in range(m):
        dataIndex = list(range(m))
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(np.random.uniform(0, len(dataIndex)))
            h = sigmoid(sum(dataMatrix[randIndex] * weights))
            error = classLabels[randIndex] - h
            weights = weights + alpha * error * dataMatrix[randIndex]
            del (dataIndex[randIndex])
    return weights
Example #29
def problem2(data, figureDir, dataName, l, maxNumRepetitions, minSampleSize, targetValue):
    if os.path.exists(getProblem2FigureLoc(figureDir, dataName, l, maxNumRepetitions)):
        #this implies that all figures before it have been created already, so we don't need to repeat them
    MSEValues = [[] for x in xrange(minSampleSize, len(data[TRAIN]) + 1)]
    sampleSizeValueList = range(minSampleSize, len(data[TRAIN]) + 1, 1)
    sampleSizeValueArray = array(sampleSizeValueList)
    targetArray = targetValue * ones(len(sampleSizeValueList), dtype=numpy.float64)
    for repeatNum in range(1, maxNumRepetitions+1):
        #randomly choose ordering of the samples for this run
        #make the range of indexes, then shuffle them into a random order
        randomlySortedIndexes = range(len(data[TRAIN]))
        #start with a sample size of one, go to the total training set
        for sampleSizeIndex, sampleSize in enumerate(sampleSizeValueList):
            curSampleIndexesList = randomlySortedIndexes[:sampleSize]
            curTrainSample = selectSample(data[TRAIN], curSampleIndexesList)
            curTrainLabelSample = selectSample(data[TRAIN_LABELS], curSampleIndexesList)
            w = doubleU(phi(curTrainSample), l, tListToTVector(curTrainLabelSample))
            curSampleMSE = MSE(data[TEST], w, data[TEST_LABELS])
    #have a sample size of 0 is meaningless
    curRepeatMeanMSEValues = array([mean(array(x, dtype=numpy.float64)) for x in MSEValues])
    plt.plot(sampleSizeValueArray, curRepeatMeanMSEValues, '-', label="Learning curve")
    plt.plot(sampleSizeValueArray, targetArray, '--', label="Target MSE")
    plt.title("lamba = " + str(l) + " - " + str(repeatNum) + " repetitions")
    plt.xlabel("Sample Size - minimum " + str(minSampleSize))
    plt.ylabel("MSE on Full Test Set")
    plt.xlim(xmin=targetValue - .5)
    plt.savefig(getProblem2FigureLoc(figureDir, dataName, l, repeatNum))
Example #30
def mask_indices(n, mask_func, k=0):
    Return the indices to access (n, n) arrays, given a masking function.

    Assume `mask_func` is a function that, for a square array a of size
    ``(n, n)`` with a possible offset argument `k`, when called as
    ``mask_func(a, k)`` returns a new array with zeros in certain locations
    (functions like `triu` or `tril` do precisely this). Then this function
    returns the indices where the non-zero values would be located.

    n : int
        The returned indices will be valid to access arrays of shape (n, n).
    mask_func : callable
        A function whose call signature is similar to that of `triu`, `tril`.
        That is, ``mask_func(x, k)`` returns a boolean array, shaped like `x`.
        `k` is an optional argument to the function.
    k : scalar
        An optional argument which is passed through to `mask_func`. Functions
        like `triu`, `tril` take a second argument that is interpreted as an

    indices : tuple of arrays.
        The `n` arrays of indices corresponding to the locations where
        ``mask_func(np.ones((n, n)), k)`` is True.

    See Also
    triu, tril, triu_indices, tril_indices

    .. versionadded:: 1.4.0

    These are the indices that would allow you to access the upper triangular
    part of any 3x3 array:

    >>> iu = np.mask_indices(3, np.triu)

    For example, if `a` is a 3x3 array:

    >>> a = np.arange(9).reshape(3, 3)
    >>> a
    array([[0, 1, 2],
           [3, 4, 5],
           [6, 7, 8]])
    >>> a[iu]
    array([0, 1, 2, 4, 5, 8])

    An offset can be passed also to the masking function.  This gets us the
    indices starting on the first diagonal right of the main one:

    >>> iu1 = np.mask_indices(3, np.triu, 1)

    with which we now extract only three elements:

    >>> a[iu1]
    array([1, 2, 5])

    m = ones((n, n), int)
    a = mask_func(m, k)
    return nonzero(a != 0)
Example #31
Example #32
def vander(x, N=None):
    Generate a Van der Monde matrix.

    The columns of the output matrix are decreasing powers of the input
    vector.  Specifically, the `i`-th output column is the input vector
    raised element-wise to the power of ``N - i - 1``.  Such a matrix with
    a geometric progression in each row is named for Alexandre-Theophile

    x : array_like
        1-D input array.
    N : int, optional
        Order of (number of columns in) the output.  If `N` is not specified,
        a square array is returned (``N = len(x)``).

    out : ndarray
        Van der Monde matrix of order `N`.  The first column is ``x^(N-1)``,
        the second ``x^(N-2)`` and so forth.

    >>> x = np.array([1, 2, 3, 5])
    >>> N = 3
    >>> np.vander(x, N)
    array([[ 1,  1,  1],
           [ 4,  2,  1],
           [ 9,  3,  1],
           [25,  5,  1]])

    >>> np.column_stack([x**(N-1-i) for i in range(N)])
    array([[ 1,  1,  1],
           [ 4,  2,  1],
           [ 9,  3,  1],
           [25,  5,  1]])

    >>> x = np.array([1, 2, 3, 5])
    >>> np.vander(x)
    array([[  1,   1,   1,   1],
           [  8,   4,   2,   1],
           [ 27,   9,   3,   1],
           [125,  25,   5,   1]])

    The determinant of a square Vandermonde matrix is the product
    of the differences between the values of the input vector:

    >>> np.linalg.det(np.vander(x))
    >>> (5-3)*(5-2)*(5-1)*(3-2)*(3-1)*(2-1)

    x = asarray(x)
    if N is None:
    X = ones( (len(x),N), x.dtype)
    for i in range(N - 1):
        X[:,i] = x**(N - i - 1)
    return X
Example #33
Example #34
Example #35
def masked_all_like(arr):
    """Returns an empty masked array of the same shape and dtype as the array `a`,
    where all the data are masked."""
    a = masked_array(numeric.empty_like(arr),
                     mask=numeric.ones(arr.shape, bool_))
    return a
Example #36
Example #37
Example #38
 def __train__(self, data, labels):
     h=hstack((data, o))
     self.w=dot(pseudoX, labels.reshape((-1,1)))
Example #39
    def __init__(self, fm_train, N, max):
        self.sigma = IntegerRange(1, max+1)
        A = []
        B = []
        #Fair transition matrix
        A = ones([37,37])/(37*1.0)
#        print A
        # Not fair transition matrix
#        for i in range(N):
#            transition = []
#            for j in range(N):
#                if(i == j):
#                    transition.append(1.0/10000000)
#                else:
#                    transition.append(1.0/(N-1))
#            A.append(transition)   
        # now data are divided in order to obtain a starting emission values for each state.
        # number of partition basing on the number of states
#        npartition = int(len(fm_train)/N)
#        # for the first N-1 states
#        for i in range(N-1):
#            submatrix = fm_train[i*npartition:((1+i)*npartition)-1]
#            occurrences = Counter(submatrix)
#            emission = [0.0]*(max)
#            for key in occurrences.keys():
#                emission[key-1] = occurrences[key-1]
#            emission = array(emission) / (sum(emission))
#            # The matrix is too sparse so an adjustement is needed: all value equal to 0 will be set to 0.0001
#            # and the normal value will be adjusted in order to have th e sum of emission equal to 1!
#            numberofzeros = list(emission).count(0)
#            adjvalue = (numberofzeros * 0.0001) / (emission != 0).sum()
#            for i in range(0,len(emission)):
#                if emission[i] == 0:
#                    emission[i] = 0.0001
#                else:
#                    emission[i] -= adjvalue
#            B.append(list(emission))
#        # the last state is computed apart because it can have different number of values!
#        submatrix = fm_train[N-1*npartition:len(fm_train)-1]
#        occurrences = Counter(submatrix)
#        emission = [0.0]*(max)
#        print occurrences
#        for key in occurrences.keys():
#            emission[key-1] = occurrences[key-1]
#        emission = array(emission) / (sum(emission))
#        # The matrix is too sparse so an adjustement is needed: all value equal to 0 will be set to 0.0001
#        # and the normal value will be adjusted in order to have th e sum of emission equal to 1!
#        numberofzeros = list(emission).count(0)
#        adjvalue = numberofzeros * 0.0001 / (emission != 0).sum()
#        for i in range(0,len(emission)):
#            if emission[i] == 0:
#                emission[i] = 0.0001
#            else:
#                emission[i] -= adjvalue
#        B.append(list(emission))

        # The emission distribution is computed basing on the output range, each state has singl range value where the probability
        # is equal and the other values have 0.0001 prob percentage
        B = []
        partition = int(max/N)
#         get occurrences of each value
        occurrences_count = Counter(fm_train)
        occurrences_prob = zeros(264)
        for i in occurrences_count.keys():
            occurrences_prob[i] = occurrences_count[i]
        # the first state is an outlier and it is treated separately
        emission = zeros(max)
        emission[1:68] = occurrences_prob[1:68]/sum(occurrences_prob[1:68])
        for i in range(18,53):
            emission = ones(max) * 0.0001
            emission[i*partition:(i+1)*partition] = (occurrences_prob[i*partition:(i+1)*partition]/sum(occurrences_prob[i*partition:(i+1)*partition])) - (((max - partition)*0.0001)/partition)
            print sum(emission)
        # last state is an outlier too
        emission = zeros(max)
        emission[209:264] = occurrences_prob[209:264]/sum(occurrences_prob[209:264])
#        # Adjusting the NaN values of the emission matrix
#        for i in range(N):
#            if(any(numpy.isnan(B[i]))):
#                B[i]= list(ones(max) / max)
#        adj_prob = (1-((max-partition)*0.0001))/partition
#        for i in range(N):
#            emission = ones(max)*0.0001
#            emission[i*partition:(i+1)*partition] = adj_prob
#            print sum(emission)
#            B.append(emission)
        print "B = %s" % B
        pi = [1.0/37]*37
        self.m = HMMFromMatrices(self.sigma, DiscreteDistribution(self.sigma), A, list(B), pi)
        train = EmissionSequence(self.sigma, fm_train)
        trainstart = time.time()
        trainend = time.time()
        print 'HMM train time'
        print trainend - trainstart
Example #40
def average(a, axis=None, weights=None, returned=False):
    """Average the array over the given axis.  If the axis is None,
    average over all dimensions of the array.  Equivalent to
    a.mean(axis) and to

      a.sum(axis) / size(a, axis)

    If weights are given, result is:
        sum(a * weights,axis) / sum(weights,axis),
    where the weights must have a's shape or be 1D with length the
    size of a in the given axis. Integer weights are converted to
    Float.  Not specifying weights is equivalent to specifying
    weights that are all 1.

    If 'returned' is True, return a tuple: the result and the sum of
    the weights or count of values. The shape of these two results
    will be the same.

    Raises ZeroDivisionError if appropriate.  (The version in MA does
    not -- it returns masked values).

    if axis is None:
        a = array(a).ravel()
        if weights is None:
            n = add.reduce(a)
            d = len(a) * 1.0
            w = array(weights).ravel() * 1.0
            n = add.reduce(multiply(a, w))
            d = add.reduce(w)
        a = array(a)
        ash = a.shape
        if ash == ():
            a.shape = (1, )
        if weights is None:
            n = add.reduce(a, axis)
            d = ash[axis] * 1.0
            if returned:
                d = ones(n.shape) * d
            w = array(weights, copy=False) * 1.0
            wsh = w.shape
            if wsh == ():
                wsh = (1, )
            if wsh == ash:
                n = add.reduce(a * w, axis)
                d = add.reduce(w, axis)
            elif wsh == (ash[axis], ):
                ni = ash[axis]
                r = [newaxis] * ni
                r[axis] = slice(None, None, 1)
                w1 = eval("w[" + repr(tuple(r)) + "]*ones(ash, float)")
                n = add.reduce(a * w1, axis)
                d = add.reduce(w1, axis)
                raise ValueError, 'averaging weights have wrong shape'

    if not isinstance(d, ndarray):
        if d == 0.0:
            raise ZeroDivisionError, 'zero denominator in average()'
    if returned:
        return n / d, d
        return n / d
Example #41
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
    """histogramdd(sample, bins=10, range=None, normed=False, weights=None)

    Return the N-dimensional histogram of the sample.


        sample : sequence or array
            A sequence containing N arrays or an NxM array. Input data.

        bins : sequence or scalar
            A sequence of edge arrays, a sequence of bin counts, or a scalar
            which is the bin count for all dimensions. Default is 10.

        range : sequence
            A sequence of lower and upper bin edges. Default is [min, max].

        normed : boolean
            If False, return the number of samples in each bin, if True,
            returns the density.

        weights : array
            Array of weights.  The weights are normed only if normed is True.
            Should the sum of the weights not equal N, the total bin count will
            not be equal to the number of samples.


        hist : array
            Histogram array.

        edges : list
            List of arrays defining the lower bin edges.




        >>> x = random.randn(100,3)
        >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))


        # Sample is an ND-array.
        N, D = sample.shape
    except (AttributeError, ValueError):
        # Sample is a sequence of 1D arrays.
        sample = atleast_2d(sample).T
        N, D = sample.shape

    nbin = empty(D, int)
    edges = D * [None]
    dedges = D * [None]
    if weights is not None:
        weights = asarray(weights)

        M = len(bins)
        if M != D:
            raise AttributeError, 'The dimension of bins must be a equal to the dimension of the sample x.'
    except TypeError:
        bins = D * [bins]

    # Select range for each dimension
    # Used only if number of bins is given.
    if range is None:
        smin = atleast_1d(array(sample.min(0), float))
        smax = atleast_1d(array(sample.max(0), float))
        smin = zeros(D)
        smax = zeros(D)
        for i in arange(D):
            smin[i], smax[i] = range[i]

    # Make sure the bins have a finite width.
    for i in arange(len(smin)):
        if smin[i] == smax[i]:
            smin[i] = smin[i] - .5
            smax[i] = smax[i] + .5

    # Create edge arrays
    for i in arange(D):
        if isscalar(bins[i]):
            nbin[i] = bins[i] + 2  # +2 for outlier bins
            edges[i] = linspace(smin[i], smax[i], nbin[i] - 1)
            edges[i] = asarray(bins[i], float)
            nbin[i] = len(edges[i]) + 1  # +1 for outlier bins
        dedges[i] = diff(edges[i])

    nbin = asarray(nbin)

    # Compute the bin number each sample falls into.
    Ncount = {}
    for i in arange(D):
        Ncount[i] = digitize(sample[:, i], edges[i])

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right
    # edge to be counted in the last bin, and not as an outlier.
    outliers = zeros(N, int)
    for i in arange(D):
        # Rounding precision
        decimal = int(-log10(dedges[i].min())) + 6
        # Find which points are on the rightmost edge.
        on_edge = where(
            around(sample[:, i], decimal) == around(edges[i][-1], decimal))[0]
        # Shift these points one bin to the left.
        Ncount[i][on_edge] -= 1

    # Flattened histogram matrix (1D)
    hist = zeros(nbin.prod(), float)

    # Compute the sample indices in the flattened histogram matrix.
    ni = nbin.argsort()
    shape = []
    xy = zeros(N, int)
    for i in arange(0, D - 1):
        xy += Ncount[ni[i]] * nbin[ni[i + 1:]].prod()
    xy += Ncount[ni[-1]]

    # Compute the number of repetitions in xy and assign it to the flattened histmat.
    if len(xy) == 0:
        return zeros(nbin - 2, int), edges

    flatcount = bincount(xy, weights)
    a = arange(len(flatcount))
    hist[a] = flatcount

    # Shape into a proper matrix
    hist = hist.reshape(sort(nbin))
    for i in arange(nbin.size):
        j = ni[i]
        hist = hist.swapaxes(i, j)
        ni[i], ni[j] = ni[j], ni[i]

    # Remove outliers (indices 0 and -1 for each dimension).
    core = D * [slice(1, -1)]
    hist = hist[core]

    # Normalize if normed is True
    if normed:
        s = hist.sum()
        for i in arange(D):
            shape = ones(D, int)
            shape[i] = nbin[i] - 2
            hist = hist / dedges[i].reshape(shape)
        hist /= s

    return hist, edges
Example #42
def masked_all(shape, dtype=float_):
    """Returns an empty masked array of the given shape and dtype,
    where all the data are masked."""
    a = masked_array(numeric.empty(shape, dtype),
                     mask=numeric.ones(shape, bool_))
    return a
Example #43
Example #44
Example #45
def polyint(p, m=1, k=None):
    Return an antiderivative (indefinite integral) of a polynomial.

    The returned order `m` antiderivative `P` of polynomial `p` satisfies
    :math:`\\frac{d^m}{dx^m}P(x) = p(x)` and is defined up to `m - 1`
    integration constants `k`. The constants determine the low-order
    polynomial part

    .. math:: \\frac{k_{m-1}}{0!} x^0 + \\ldots + \\frac{k_0}{(m-1)!}x^{m-1}

    of `P` so that :math:`P^{(j)}(0) = k_{m-j-1}`.

    p : {array_like, poly1d}
        Polynomial to differentiate.
        A sequence is interpreted as polynomial coefficients, see `poly1d`.
    m : int, optional
        Order of the antiderivative. (Default: 1)
    k : {None, list of `m` scalars, scalar}, optional
        Integration constants. They are given in the order of integration:
        those corresponding to highest-order terms come first.

        If ``None`` (default), all constants are assumed to be zero.
        If `m = 1`, a single scalar can be given instead of a list.

    See Also
    polyder : derivative of a polynomial
    poly1d.integ : equivalent method

    The defining property of the antiderivative:

    >>> p = np.poly1d([1,1,1])
    >>> P = np.polyint(p)
    >>> P
    poly1d([ 0.33333333,  0.5       ,  1.        ,  0.        ])
    >>> np.polyder(P) == p

    The integration constants default to zero, but can be specified:

    >>> P = np.polyint(p, 3)
    >>> P(0)
    >>> np.polyder(P)(0)
    >>> np.polyder(P, 2)(0)
    >>> P = np.polyint(p, 3, k=[6,5,3])
    >>> P
    poly1d([ 0.01666667,  0.04166667,  0.16666667,  3. ,  5. ,  3. ])

    Note that 3 = 6 / 2!, and that the constants are given in the order of
    integrations. Constant of the highest-order polynomial term comes first:

    >>> np.polyder(P, 2)(0)
    >>> np.polyder(P, 1)(0)
    >>> P(0)

    m = int(m)
    if m < 0:
        raise ValueError("Order of integral must be positive (see polyder)")
    if k is None:
        k = NX.zeros(m, float)
    k = atleast_1d(k)
    if len(k) == 1 and m > 1:
        k = k[0]*NX.ones(m, float)
    if len(k) < m:
        raise ValueError(
              "k must be a scalar or a rank-1 array of length 1 or >m.")

    truepoly = isinstance(p, poly1d)
    p = NX.asarray(p)
    if m == 0:
        if truepoly:
            return poly1d(p)
        return p
        # Note: this must work also with object and integer arrays
        y = NX.concatenate((p.__truediv__(NX.arange(len(p), 0, -1)), [k[0]]))
        val = polyint(y, m - 1, k=k[1:])
        if truepoly:
            return poly1d(val)
        return val
Example #46
import sys
for line in fr:
        if poi!='-1':
            print('try parts[2]:',parts[2])
            X[poi,lie] = parts[1]
    except :
        print('no error')
Example #47
Example #48
 def _followxSingleDirection(  self, 
                               direction = Direction.FORWARD,
                               forward_curve = None,
                               last_eigenvector = None, 
                               weights = 1.):
   '''Generates a partial lpc curve dictionary from the start point, x.
   x : 1-dim, length m, numpy.array of floats, start point for the algorithm when m is dimension of feature space
   direction :  bool, proceeds in Direction.FORWARD or Direction.BACKWARD from this point (just sets sign for first eigenvalue) 
   forward_curve : dictionary as returned by this function, is used to detect crossing of the curve under construction with a
       previously constructed curve
   last_eigenvector : 1-dim, length m, numpy.array of floats, a unit vector that defines the initial direction, relative to
       which the first eigenvector is biased and initial cos_neu_neu is calculated  
   weights : 1-dim, length n numpy.array of observation weights (can also be used to exclude
       individual observations from the computation by setting their weight to zero.),
       where n is the number of feature points 
   x0 = copy(x)
   N = self.Xi.shape[0]
   d = self.Xi.shape[1]
   it = self._lpcParameters['it']
   h = array(self._lpcParameters['h'])
   t0 = self._lpcParameters['t0']
   rho0 = self._lpcParameters['rho0']
   save_xd = empty((it,d))
   eigen_vecd = empty((it,d))
   c0 = ones(it)
   cos_alt_neu = ones(it)
   cos_neu_neu = ones(it)    
   lamb = empty(it) #NOTE this is named 'lambda' in the original R code
   rho = zeros(it)
   high_rho_points = empty((0,d))    
   count_points = 0
   for i in range(it):
     kernel_weights = self._kernd(self.Xi, x0, c0[i]*h) * weights
     mu_x = average(self.Xi, axis = 0, weights = kernel_weights)
     sum_weights = sum(kernel_weights)
     mean_sub = self.Xi - mu_x 
     cov_x = dot( dot(transpose(mean_sub), numpy.diag(kernel_weights)), mean_sub) / sum_weights 
     #assert (abs(cov_x.transpose() - cov_x)/abs(cov_x.transpose() + cov_x) < 1e-6).all(), 'Covariance matrix not symmetric, \n cov_x = {0}, mean_sub = {1}'.format(cov_x, mean_sub)
     save_xd[i] = mu_x #save first point of the branch
     count_points += 1
     #calculate path length
     if i==0:
       lamb[0] = 0
       lamb[i] = lamb[i-1] + sqrt(sum((mu_x - save_xd[i-1])**2))
     #calculate eigenvalues/vectors
     #(sorted_eigen_cov is a list of tuples containing eigenvalue and associated eigenvector, sorted descending by eigenvalue)
     eigen_cov = eigh(cov_x)
     sorted_eigen_cov = zip(eigen_cov[0],map(ravel,vsplit(eigen_cov[1].transpose(),len(eigen_cov[1]))))
     sorted_eigen_cov.sort(key = lambda elt: elt[0], reverse = True)   
     eigen_norm = sqrt(sum(sorted_eigen_cov[0][1]**2))
     eigen_vecd[i] = direction * sorted_eigen_cov[0][1] / eigen_norm  #Unit eigenvector corresponding to largest eigenvalue
     #rho parameters
     rho[i] = sorted_eigen_cov[1][0] / sorted_eigen_cov[0][0] #Ratio of two largest eigenvalues
     if i != 0 and rho[i] > rho0 and rho[i-1] <= rho0:
       high_rho_points = vstack((high_rho_points, x0))
     #angle between successive eigenvectors
     if i==0 and last_eigenvector is not None:
       cos_alt_neu[i] = direction * dot(last_eigenvector, eigen_vecd[i])
     if i > 0:
       cos_alt_neu[i] = dot(eigen_vecd[i], eigen_vecd[i-1])
     #signum flipping
     if cos_alt_neu[i] < 0:
       eigen_vecd[i] = -eigen_vecd[i]
       cos_neu_neu[i] = -cos_alt_neu[i]
       cos_neu_neu[i] = cos_alt_neu[i]
     #angle penalization
     pen = self._lpcParameters['pen']
     if pen > 0:
       if i == 0 and last_eigenvector is not None:
         a = abs(cos_alt_neu[i])**pen
         eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * last_eigenvector
       if i > 0:
         a = abs(cos_alt_neu[i])**pen
         eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * eigen_vecd[i-1]
     #check curve termination criteria
     if i not in (0, it-1):
       cross = self._lpcParameters['cross']
       if forward_curve is None:
         full_curve_points = save_xd[0:i+1]
         full_curve_points = vstack((forward_curve['save_xd'],save_xd[0:i+1])) #inefficient, initialize then append? 
       if not cross:
         prox = where(ravel(cdist(full_curve_points,[mu_x])) <= mean(h))[0]
         if len(prox) != max(prox) - min(prox) + 1:
       convergence_at = self._lpcParameters['convergence_at']
       conv_ratio = abs(lamb[i] - lamb[i-1]) / (2 * (lamb[i] + lamb[i-1]))
       if conv_ratio  < convergence_at:
       boundary = self._lpcParameters['boundary']
       if conv_ratio < boundary:
         c0[i+1] = 0.995 * c0[i]
         c0[i+1] = min(1.01*c0[i], 1)
     #step along in direction eigen_vecd[i]
     x0 = mu_x + t0 * eigen_vecd[i]
   #trim output in the case where convergence occurs before 'it' iterations    
   curve = { 'save_xd': save_xd[0:count_points],
             'eigen_vecd': eigen_vecd[0:count_points],
             'cos_neu_neu': cos_neu_neu[0:count_points],
             'rho': rho[0:count_points],
             'high_rho_points': high_rho_points,
             'lamb': lamb[0:count_points],
             'c0': c0[0:count_points]
   return curve