Example #1
0
def text_apr(data, d1data=None, d2data=None):
    N = 10 
    T = data.shape[-1]
    C, D, DD = dcheb(T, N)
    K = fblas.dgemm(alpha=1., a=data.T, b=C.T, trans_a=True)
    apr = fblas.dgemm(alpha=1., a=C.T, b=K, trans_a=False, trans_b=True).T
    d1apr = fblas.dgemm(alpha=1., a=D.T, b=K, trans_a=False, trans_b=True).T
    d2apr = fblas.dgemm(alpha=1., a=DD.T, b=K, trans_a=False, trans_b=True).T
    print ((data - apr) ** 2).sum() / (data**2).sum()
    if d1data is not None:
        print ((d1data - d1apr) ** 2).sum() / (d1data**2).sum()
    if d2data is not None:
        print ((d2data - d2apr) ** 2).sum() / (d2data**2).sum()
Example #2
0
def mygemm(alpha,A,B,dtype=None,**kwargs):
    '''
    my gemm function that uses scipy fblas functions.
    '''
    from scipy.linalg.fblas import dgemm, sgemm
    if dtype is None:
        dtype=A.dtype
    if dtype != np.float32 and dtype != np.float64:
        print 'Error: this function cannot deal with such dtype.'
        exit()
    if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \
            or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']):
        print 'Matrices should either be C or F contiguous.'
        exit()
    if A.dtype != dtype:
        A=np.asarray(A,dtype=dtype)
    if B.dtype != dtype:
        B=np.asarray(B,dtype=dtype)
    if A.flags['F_CONTIGUOUS']:
        trans_a=0
    else:
        A=A.T
        trans_a=1
    if B.flags['F_CONTIGUOUS']:
        trans_b=0
    else:
        B=B.T
        trans_b=1
    if dtype==np.float32:
        return sgemm(alpha,A,B,trans_a=trans_a,trans_b=trans_b,**kwargs)
    else:
        return dgemm(alpha,A,B,trans_a=trans_a,trans_b=trans_b,**kwargs)
Example #3
0
 def mean_cov(X):
     n, p = X.shape
     m = X.mean(axis=0)
     # covariance matrix with correction for rounding error
     # S = (cx'*cx - (scx'*scx/n))/(n-1)
     # Am Stat 1983, vol 37: 242-247.
     cx = X - m
     cxT_a = zeros((p - 1, n), float64)
     cxT_b = zeros((p - 1, n), float64)
     cxT_a[:, :] = (cx.T)[0:p - 1, :]
     cxT_b[:, :] = (cx.T)[1:p, :]
     scx = cx.sum(axis=0)
     scx_op = dger(-1.0 / n, scx, scx)
     S = dgemm(1.0,
               cx.T,
               cx.T,
               beta=1.0,
               c=scx_op,
               trans_a=0,
               trans_b=1,
               overwrite_c=1)
     #S = dgemm(1.0, cxT_a, cxT_b, beta=1.0,
     #        c=scx_op, trans_a=0, trans_b=1, overwrite_c=1)
     S[:] *= 1.0 / (n - 1)
     return m, S.T
Example #4
0
 def __setJac2(self):
     '''Calculates :math:`J^T J` and :math:`J^T e` for the training data.
     Used for Levenberg-Marquardt method.'''
     self.jac2.fill(0)
     self.jacDiff.fill(0)
     for i in range(self.tTD.nBlocks):
         data = self.tTD.getDataBlock(i)
         vals,valOut,hiddenOut = self.__processDataBlock(data)
         diffs = numexpr.evaluate('valOut - vals')
         jac = np.empty((data.shape[0], (self.nHid) * (self.nIn+1) + self.nHid + 1))
         d0 = numexpr.evaluate('-vals * (1 - vals)')
         ot = (np.outer(d0, self.l2))
         dj = numexpr.evaluate('hiddenOut * (1 - hiddenOut) * ot')
         I = np.tile(np.arange(data.shape[0]), (self.nHid + 1, 1)).flatten('F')
         J = np.arange(data.shape[0] * (self.nHid + 1))
         Q = ss.csc_matrix((dj.flatten(), np.vstack((J, I))), (data.shape[0] * (self.nHid + 1), data.shape[0]))
         jac[:, 0:self.nHid + 1] = ss.spdiags(d0, 0, data.shape[0], data.shape[0]).dot(hiddenOut)
         Q2 = np.reshape(Q.dot(data), (data.shape[0],(self.nIn+1) * (self.nHid + 1)))
         jac[:, self.nHid + 1:jac.shape[1]] = Q2[:, 0:Q2.shape[1] - (self.nIn+1)]
         if hasfblas:
             self.jac2 += fblas.dgemm(1.0,a=jac.T,b=jac.T,trans_b=True)
             self.jacDiff += fblas.dgemv(1.0,a=jac.T,x=diffs)
         else:
             self.jac2 += np.dot(jac.T,jac)
             self.jacDiff += np.dot(jac.T,diffs)
Example #5
0
 def __setJac2(self):
     '''Calculates :math:`J^T J` and :math:`J^T e` for the training data.
     Used for Levenberg-Marquardt method.'''
     self.jac2.fill(0)
     self.jacDiff.fill(0)
     for i in range(self.tTD.nBlocks):
         data = self.tTD.getDataBlock(i)
         vals, valOut, hiddenOut = self.__processDataBlock(data)
         diffs = numexpr.evaluate('valOut - vals')
         jac = np.empty(
             (data.shape[0], (self.nHid) * (self.nIn + 1) + self.nHid + 1))
         d0 = numexpr.evaluate('-vals * (1 - vals)')
         ot = (np.outer(d0, self.l2))
         dj = numexpr.evaluate('hiddenOut * (1 - hiddenOut) * ot')
         I = np.tile(np.arange(data.shape[0]),
                     (self.nHid + 1, 1)).flatten('F')
         J = np.arange(data.shape[0] * (self.nHid + 1))
         Q = ss.csc_matrix((dj.flatten(), np.vstack((J, I))),
                           (data.shape[0] * (self.nHid + 1), data.shape[0]))
         jac[:, 0:self.nHid + 1] = ss.spdiags(d0, 0, data.shape[0],
                                              data.shape[0]).dot(hiddenOut)
         Q2 = np.reshape(Q.dot(data),
                         (data.shape[0], (self.nIn + 1) * (self.nHid + 1)))
         jac[:, self.nHid + 1:jac.shape[1]] = Q2[:, 0:Q2.shape[1] -
                                                 (self.nIn + 1)]
         if hasfblas:
             self.jac2 += fblas.dgemm(1.0, a=jac.T, b=jac.T, trans_b=True)
             self.jacDiff += fblas.dgemv(1.0, a=jac.T, x=diffs)
         else:
             self.jac2 += np.dot(jac.T, jac)
             self.jacDiff += np.dot(jac.T, diffs)
Example #6
0
def mygemm(alpha, A, B, dtype=None, **kwargs):
    '''
    my gemm function that uses scipy fblas functions.
    '''
    from scipy.linalg.fblas import dgemm, sgemm
    if dtype is None:
        dtype = A.dtype
    if dtype != np.float32 and dtype != np.float64:
        print 'Error: this function cannot deal with such dtype.'
        exit()
    if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \
            or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']):
        print 'Matrices should either be C or F contiguous.'
        exit()
    if A.dtype != dtype:
        A = np.asarray(A, dtype=dtype)
    if B.dtype != dtype:
        B = np.asarray(B, dtype=dtype)
    if A.flags['F_CONTIGUOUS']:
        trans_a = 0
    else:
        A = A.T
        trans_a = 1
    if B.flags['F_CONTIGUOUS']:
        trans_b = 0
    else:
        B = B.T
        trans_b = 1
    if dtype == np.float32:
        return sgemm(alpha, A, B, trans_a=trans_a, trans_b=trans_b, **kwargs)
    else:
        return dgemm(alpha, A, B, trans_a=trans_a, trans_b=trans_b, **kwargs)
Example #7
0
def calc_argmax(data):
    T = data.shape[-1]
    N = T // 5
    t0 = time()
    C, D, DD = dcheb(T, N)
    K = fblas.dgemm(alpha=1., b=data.T, a=C.T).T
    apr = fblas.dgemm(alpha=1., a=C.T, b=K.T, trans_a=True).T
    argmaxs = apr.argmax(axis=-1)
#    Ds = DD[argmaxs]  # makes copies
#    DDs = DD[argmaxs]
#    pers = inner1d(K, Ds) / inner1d(K, DDs) 
    pers = np.empty_like(argmaxs, dtype=float)
    for i, ax in enumerate(argmaxs):   # does not make copies. slower.
        pers[i] = fblas.ddot(K[i], D[ax]) / fblas.ddot(K[i], DD[ax])
    pers[pers > 1] = 0
    return argmaxs - pers
def linear_least_squares(a, b, residuals=False):
    """
    Return the least-squares solution to a linear matrix equation.

    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    Parameters
    ----------
    a : (M, N) array_like
        "Coefficient" matrix.
    b : (M,) array_like
        Ordinate or "dependent variable" values.
    residuals : bool
        Compute the residuals associated with the least-squares solution

    Returns
    -------
    x : (M,) ndarray
        Least-squares solution. The shape of `x` depends on the shape of
        `b`.
    residuals : int (Optional)
        Sums of residuals; squared Euclidean 2-norm for each column in
        ``b - a*x``.
    """
    if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']:
        warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \
             ' in increased memory usage.')

    a = np.asarray(a, order='c')
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()

    if residuals:
        return x, np.linalg.norm(np.dot(a, x) - b)
    else:
        return x
def linear_least_squares(a, b, residuals=False):
    """
    Return the least-squares solution to a linear matrix equation.

    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    Parameters
    ----------
    a : (M, N) array_like
        "Coefficient" matrix.
    b : (M,) array_like
        Ordinate or "dependent variable" values.
    residuals : bool
        Compute the residuals associated with the least-squares solution

    Returns
    -------
    x : (M,) ndarray
        Least-squares solution. The shape of `x` depends on the shape of
        `b`.
    residuals : int (Optional)
        Sums of residuals; squared Euclidean 2-norm for each column in
        ``b - a*x``.
    """
    if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']:
        warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \
             ' in increased memory usage.')

    a = np.asarray(a, order='c')
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()

    if residuals:
        return x, np.linalg.norm(np.dot(a, x) - b)
    else:
        return x
Example #10
0
def verify_ga_gemm(ta, tb, num_m, num_n, num_k, alpha, g_a, g_b, beta, g_c):
    tmpa = np.ndarray((num_m, num_k), dtype=np.float64)
    tmpb = np.ndarray((num_k, num_n), dtype=np.float64)
    tmpc = np.ndarray((num_m, num_n), dtype=np.float64)
    tmpa = ga.get(g_a, buffer=tmpa)
    tmpb = ga.get(g_b, buffer=tmpb)
    tmpc = ga.get(g_c, buffer=tmpc)
    if not ta and not tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif ta and not tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif not ta and tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    elif ta and tb:
        result = dgemm(alpha, tmpa, tmpb, beta=beta, trans_a=ta, trans_b=tb)
    else:
        raise ValueError, "shouldn't get here"
    abs_value = np.abs(tmpc-result)
    if np.any(abs_value > 1):
        ga.error('verify ga.gemm failed')
Example #11
0
 def mean_cov(X):
     n,p = X.shape
     m = X.mean(axis=0)
     # covariance matrix with correction for rounding error
     # S = (cx'*cx - (scx'*scx/n))/(n-1)
     # Am Stat 1983, vol 37: 242-247.
     cx = X - m
     cxT_a = zeros((p-1,n),float64)
     cxT_b = zeros((p-1,n),float64)
     cxT_a[:,:] = (cx.T)[0:p-1,:]
     cxT_b[:,:] = (cx.T)[1:p,:]
     scx = cx.sum(axis=0)
     scx_op = dger(-1.0/n,scx,scx)
     S = dgemm(1.0, cx.T, cx.T, beta=1.0,
             c=scx_op, trans_a=0, trans_b=1, overwrite_c=1)
     #S = dgemm(1.0, cxT_a, cxT_b, beta=1.0,
     #        c=scx_op, trans_a=0, trans_b=1, overwrite_c=1)
     S[:] *= 1.0/(n-1)
     return m,S.T
Example #12
0
def gemm(alpha,A,B,dtype=None,**kwargs):
    '''A gemm function that uses scipy fblas functions, avoiding matrix copy
    when the input is transposed.
    
    The returned matrix is designed to be C_CONTIGUOUS.
    '''
    from scipy.linalg.fblas import dgemm, sgemm
    if A.ndim != 2 or B.ndim != 2:
        raise TypeError, 'mygemm only deals with 2-D matrices.'
    if dtype is None:
        dtype=A.dtype
    if dtype != np.float32 and dtype != np.float64:
        raise TypeError, 'Error: this function cannot deal with dtype {}.'.format(dtype)
    if not (A.flags['F_CONTIGUOUS'] or A.flags['C_CONTIGUOUS']) \
            or not (B.flags['F_CONTIGUOUS'] or B.flags['C_CONTIGUOUS']):
        raise TypeError, 'Matrices should either be C or F contiguous.'
    if A.dtype != dtype:
        A=np.asarray(A,dtype=dtype)
    if B.dtype != dtype:
        B=np.asarray(B,dtype=dtype)
    
    # In fact, what we are doing here is (1) compute B*A, and (2) transpose the
    # result. The reason is that fblas returns F_CONTINUOUS matrices, so doing 
    # this enables us to get a final output that is C_CONTIGUOUS.
    if not B.flags['F_CONTIGUOUS']:
        B = B.T
        trans_b=0
    else:
        trans_b=1
    if not A.flags['F_CONTIGUOUS']:
        A = A.T
        trans_a=0
    else:
        trans_a=1
    if dtype==np.float32:
        return sgemm(alpha,B,A,trans_a=trans_b,trans_b=trans_a,**kwargs).T
    else:
        return dgemm(alpha,B,A,trans_a=trans_b,trans_b=trans_a,**kwargs).T
def admmDP(C, a=1.0, props={}, warmX=None, warmU=None):
    n = C.shape[0]
    logger = logging.getLogger("search.admm.dp")
    beta = props.get('priorWeight', 0.01)
    maxIter = props.get("maxParamaterOptIters", 1000)
    epsilon = props.get("epsilon", 1e-6)
    returnIntermediate = props.get("returnIntermediate", False)
    regDiagonal = props.get("regDiagonal", False)
    degreePriorADMM = props.get("degreePrior", True)
    adaptiveMu = props.get("adaptiveMu", True)
    props['dualDecompEpsilon'] = props.get('dualDecompEpsilon', 1e-10)
    sTime = time.time()

    logger.info("Starting ADMM learning. n=%d beta=%1.4f", n, beta)

    mu = props.get("mu", 0.1) #10.0
    muChange = props.get("muChange", 0.1)
    
    if props.get("normalizeForADMM", True):
        logger.info("-- NORMALIZING C --")
        covNormalizer = sqrt(diagonal(C))
        C = C / outer(covNormalizer, covNormalizer)
    else:
        logger.info("NOT NORMALIZING C")
    
    # Rescale to make beta's range a better fit
    maxOffDiag = numpy.max(numpy.abs(tril(C, -1)))
    C = array(C / maxOffDiag)
    
    if warmX is not None:
        X = warmX
    else:
        X = eye(n)
        
    if warmU is not None:
        U = warmU
    else:
        U = eye(n)
    
    
    Z = copy(X)
    
    ll = inf
    Xs = []
    gs = []
    ds = []

    if degreePriorADMM:
        adelta = -a[:n] + a[1:]
        adeltaMat = outer(ones(n), adelta)
        beta = beta / 2

        logger.error("adelta: %s", adelta[:6])

    warmV = zeros((n,n))

    if a is None:
        a = 1.0

    for i in range(maxIter):
            
        #####################################################################
        ##### Eigenvalue update to X
        logger.debug("Performing eigenvalue decomposition")
        
        for retry in range(6):
            try:
                A = mu*(Z - U) - C
                (lamb, Q) = linalg.eigh(A)
                logger.debug("Decomposition finished")
                break
            except numpy.linalg.linalg.LinAlgError as err:
                # If A is not in the PSD cone, we reduce the step size mu
                logger.error("Failed eigendecomposition with mu=%2.2e", mu)
                mu *= 0.5
                U /= 0.5
                logger.error("Retry %d, halving mu to: %2.5f", retry, mu)
        
        newEigs = (lamb + sqrt(lamb*lamb + 4*mu)) / (2*mu)
        X = FB.dgemm(alpha=1.0, a=(Q*newEigs), b=Q, trans_b=True)
        
        #### Soft thresholding update Z
        logger.debug("Starting Proximal step")
        Zpreshrink = X + U
        Zlast = copy(Z)
        if degreePriorADMM:
            Z = proxSubDualDecomp(Zpreshrink, beta/mu, adelta, adeltaMat, props, warmV)
        else:
            Z = shrink(Zpreshrink, beta*a/mu, regDiagonal)
        
        if props.get("nonpositivePrecision", False):
            Z = Z * (Z < 0) + diag(diag(Z))
        
        ### Update U ( U is the sum of residuals so far )
        logger.debug("Updating U")
        U += X - Z
        #####################################################################
        
        dualResidual = linalg.norm(Z - Zlast)
        residual = linalg.norm(X-Z)
           
        if adaptiveMu:
            # if the two residuals differ my more than this factor, adjust mu (p20)
            differenceMargin = 10
            if residual > dualResidual*differenceMargin:
                mu *= 1.0 + muChange
                U /= 1.0 + muChange
                logger.debug("*** Increasing mu to %2.6f", mu)
            elif dualResidual > residual*differenceMargin:
                mu *= 1.0 - muChange
                U /= 1.0 - muChange
                logger.debug("*** Decreasing mu to %2.6f", mu)
        
        # Ensure that the dual decomp procedure is run with enough accuracy
        ddeps = props['dualDecompEpsilon']
        margin = 50.0
        if residual < margin*ddeps or dualResidual < margin*ddeps:
            props['dualDecompEpsilon'] = min(residual, dualResidual)/margin
            
        if returnIntermediate:
            ds.append(dualResidual)
            gs.append(residual)
        
        if residual < epsilon and dualResidual < epsilon:
            logger.info("Converged to %2.3e in %i iters", residual, i+1)
            break
        
        edges = (count_nonzero(Z) - n) / 2
        logger.info("Iter %d, res: %2.2e, dual res: %2.2e, mu=%1.1e, %d edges free", 
                    i+1, residual, dualResidual, mu, edges)
    
    eTime = time.time()
    timeTaken = eTime-sTime
    logger.info("Time taken(s): %5.7f", timeTaken)
    
    if residual > epsilon or dualResidual > epsilon:
        logger.error("NONCONVERGENCE!!, res: %2.2e, dres: %2.2e, iters: %d", 
                    residual, dualResidual, i)
    
    edges = (count_nonzero(Z) - n) / 2
    logger.info("regDiagonal: %s, beta: %2.4f", regDiagonal, beta)
    logger.info("Edges %d out of %d   | eps=%1.1e", edges, (n*n - n)/2, epsilon)
    logger.info("Final residual=%2.2e, dual res=%2.2e", residual, dualResidual)
            
    return {'X': Z, 'U': U, 'obj': ll, 'iteration': i+1, 'Xs': Xs, 'gs': gs, 'ds': ds,
            'timeTaken': timeTaken, 'edges': edges, 'Zpreshrink': Zpreshrink, 'bm': beta/mu}
def admmDP(C, a=1.0, props={}, warmX=None, warmU=None):
    n = C.shape[0]
    logger = logging.getLogger("search.admm.dp")
    beta = props.get('priorWeight', 0.01)
    maxIter = props.get("maxParamaterOptIters", 1000)
    epsilon = props.get("epsilon", 1e-6)
    returnIntermediate = props.get("returnIntermediate", False)
    regDiagonal = props.get("regDiagonal", False)
    degreePriorADMM = props.get("degreePrior", True)
    adaptiveMu = props.get("adaptiveMu", True)
    props['dualDecompEpsilon'] = props.get('dualDecompEpsilon', 1e-10)
    sTime = time.time()

    logger.info("Starting ADMM learning. n=%d beta=%1.4f", n, beta)

    mu = props.get("mu", 0.1)  #10.0
    muChange = props.get("muChange", 0.1)

    if props.get("normalizeForADMM", True):
        logger.info("-- NORMALIZING C --")
        covNormalizer = sqrt(diagonal(C))
        C = C / outer(covNormalizer, covNormalizer)
    else:
        logger.info("NOT NORMALIZING C")

    # Rescale to make beta's range a better fit
    maxOffDiag = numpy.max(numpy.abs(tril(C, -1)))
    C = array(C / maxOffDiag)

    if warmX is not None:
        X = warmX
    else:
        X = eye(n)

    if warmU is not None:
        U = warmU
    else:
        U = eye(n)

    Z = copy(X)

    ll = inf
    Xs = []
    gs = []
    ds = []

    if degreePriorADMM:
        adelta = -a[:n] + a[1:]
        adeltaMat = outer(ones(n), adelta)
        beta = beta / 2

        logger.error("adelta: %s", adelta[:6])

    warmV = zeros((n, n))

    if a is None:
        a = 1.0

    for i in range(maxIter):

        #####################################################################
        ##### Eigenvalue update to X
        logger.debug("Performing eigenvalue decomposition")

        for retry in range(6):
            try:
                A = mu * (Z - U) - C
                (lamb, Q) = linalg.eigh(A)
                logger.debug("Decomposition finished")
                break
            except numpy.linalg.linalg.LinAlgError as err:
                # If A is not in the PSD cone, we reduce the step size mu
                logger.error("Failed eigendecomposition with mu=%2.2e", mu)
                mu *= 0.5
                U /= 0.5
                logger.error("Retry %d, halving mu to: %2.5f", retry, mu)

        newEigs = (lamb + sqrt(lamb * lamb + 4 * mu)) / (2 * mu)
        X = FB.dgemm(alpha=1.0, a=(Q * newEigs), b=Q, trans_b=True)

        #### Soft thresholding update Z
        logger.debug("Starting Proximal step")
        Zpreshrink = X + U
        Zlast = copy(Z)
        if degreePriorADMM:
            Z = proxSubDualDecomp(Zpreshrink, beta / mu, adelta, adeltaMat,
                                  props, warmV)
        else:
            Z = shrink(Zpreshrink, beta * a / mu, regDiagonal)

        if props.get("nonpositivePrecision", False):
            Z = Z * (Z < 0) + diag(diag(Z))

        ### Update U ( U is the sum of residuals so far )
        logger.debug("Updating U")
        U += X - Z
        #####################################################################

        dualResidual = linalg.norm(Z - Zlast)
        residual = linalg.norm(X - Z)

        if adaptiveMu:
            # if the two residuals differ my more than this factor, adjust mu (p20)
            differenceMargin = 10
            if residual > dualResidual * differenceMargin:
                mu *= 1.0 + muChange
                U /= 1.0 + muChange
                logger.debug("*** Increasing mu to %2.6f", mu)
            elif dualResidual > residual * differenceMargin:
                mu *= 1.0 - muChange
                U /= 1.0 - muChange
                logger.debug("*** Decreasing mu to %2.6f", mu)

        # Ensure that the dual decomp procedure is run with enough accuracy
        ddeps = props['dualDecompEpsilon']
        margin = 50.0
        if residual < margin * ddeps or dualResidual < margin * ddeps:
            props['dualDecompEpsilon'] = min(residual, dualResidual) / margin

        if returnIntermediate:
            ds.append(dualResidual)
            gs.append(residual)

        if residual < epsilon and dualResidual < epsilon:
            logger.info("Converged to %2.3e in %i iters", residual, i + 1)
            break

        edges = (count_nonzero(Z) - n) / 2
        logger.info(
            "Iter %d, res: %2.2e, dual res: %2.2e, mu=%1.1e, %d edges free",
            i + 1, residual, dualResidual, mu, edges)

    eTime = time.time()
    timeTaken = eTime - sTime
    logger.info("Time taken(s): %5.7f", timeTaken)

    if residual > epsilon or dualResidual > epsilon:
        logger.error("NONCONVERGENCE!!, res: %2.2e, dres: %2.2e, iters: %d",
                     residual, dualResidual, i)

    edges = (count_nonzero(Z) - n) / 2
    logger.info("regDiagonal: %s, beta: %2.4f", regDiagonal, beta)
    logger.info("Edges %d out of %d   | eps=%1.1e", edges, (n * n - n) / 2,
                epsilon)
    logger.info("Final residual=%2.2e, dual res=%2.2e", residual, dualResidual)

    return {
        'X': Z,
        'U': U,
        'obj': ll,
        'iteration': i + 1,
        'Xs': Xs,
        'gs': gs,
        'ds': ds,
        'timeTaken': timeTaken,
        'edges': edges,
        'Zpreshrink': Zpreshrink,
        'bm': beta / mu
    }