Example #1
0
def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2).astype('int64')
    u = uw[:l]
    w = uw[l:]

    # Callable tensor
    logp_ = lambda input: theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + np.log(2.0 * np.pi))

    return elbo
Example #2
0
def show_overlay(img3d, cc3d, ncc=10, s=85, xyz = 'xy',alpha=.8):
    """Shows the connected components overlayed over img3d

    Input
    ======
    img3d -- 3d array
    cc3d -- 3d array ( preferably of same shape as img3d, use get_3d_cc(...) )
    ncc -- where to cut off the color scale
    s -- slice to show
    xyz -- which projection to use in {'xy','xz','yz'}
    """
    cc = get_slice(cc3d,s,xyz)
    img = get_slice(img3d,s,xyz)

    notcc = np.isnan(cc)
    incc = np.not_equal(notcc,True)

    img4 = plt.cm.gray(img/np.nanmax(img))
    if ncc is not np.Inf:
        cc = plt.cm.jet(cc/float(ncc))
    else:
        cc = plt.cm.jet(np.log(cc)/np.log(np.nanmax(cc)))

    cc[notcc,:]=img4[notcc,:]
    cc[incc,3] = 1-img[incc]/(2*np.nanmax(img))

    plt.imshow(cc)
Example #3
0
def chol_logdet(U):
    if isinstance(U, np.ndarray):
        return 2*np.sum(np.log(np.diag(U)))
    elif isinstance(U, cholmod.Factor):
        return np.sum(np.log(U.D()))
    else:
        raise ValueError("Unknown type of Cholesky factor")
def joint_logdist(pi, alpha, sigma, tau, u):
    abs_pi = len(pi)
    n = np.sum(pi)
    tmp = abs_pi * log(alpha) + (n - 1.) * log(u) - gammaln(n) - (n - sigma * abs_pi) * log(u + tau) \
          - (alpha / sigma) * ((u + tau) ** sigma - tau ** sigma)
    tmp += np.sum(gammaln(pi - sigma) - gammaln(1. - sigma))
    return tmp
Example #5
0
def _lmvnpdffull(obs, means, covars):
    """
    Log probability for full covariance matrices.

    WARNING: In certain cases, this function will modify in-place
    some of the covariance matrices

    """
    from scipy import linalg
    import itertools
    if hasattr(linalg, 'solve_triangular'):
        # only in scipy since 0.9
        solve_triangular = linalg.solve_triangular
    else:
        # slower, but works
        solve_triangular = linalg.solve
    n_obs, n_dim = obs.shape
    nmix = len(means)
    log_prob = np.empty((n_obs, nmix))
    for c, (mu, cv) in enumerate(itertools.izip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            # The model is most probabily stuck in a component with too
            # few observations, we need to reinitialize this components
            cv[:] = 10 * np.eye(cv.shape[0])
            cv_chol = cv
        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = solve_triangular(cv_chol, (obs - mu).T, lower=True).T
        log_prob[:, c] = -.5 * (np.sum(cv_sol ** 2, axis=1) + \
                           n_dim * np.log(2 * np.pi) + cv_log_det)

    return log_prob
def ll(actual, predicted):
    """
    Computes the log likelihood.

    This function computes the log likelihood between two numbers,
    or for element between a pair of lists or numpy arrays.

    Parameters
    ----------
    actual : int, float, list of numbers, numpy array
             The ground truth value
    predicted : same type as actual
                The predicted value

    Returns
    -------
    score : double or list of doubles
            The log likelihood error between actual and predicted

    """
    actual = np.array(actual)
    predicted = np.array(predicted)
    err = np.seterr(all='ignore')
    score = -(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))
    np.seterr(divide=err['divide'], over=err['over'],
              under=err['under'], invalid=err['invalid'])
    if type(score) == np.ndarray:
        score[np.isnan(score)] = 0
    else:
        if np.isnan(score):
            score = 0
    return score
Example #7
0
def log_diff_exp(x, axis=0):
    """ Calculates the logarithm of the diffs of e to the power of input 'x'. The method tries to avoid
        overflows by using the relationship: log(diff(exp(x))) = alpha + log(diff(exp(x-alpha))).
        
    :Parameter:
        x:    data.
             -type: float or numpy array 
          
        axis: Sums along the given axis.
             -type: int
        
    :Return:
        Logarithm of the sum of exp of x. 
       -type: float or numpy array.
        
    """
    alpha = x.max(axis) - numx.log(numx.finfo(numx.float64).max)/2.0
    if axis == 1:
        return numx.squeeze(alpha + numx.log(
                                             numx.diff(
                                                       numx.exp(x.T - alpha)
                                                       , n=1, axis=0)))
    else:
        return numx.squeeze(alpha + numx.log(
                                             numx.diff(
                                                       numx.exp(x - alpha)
                                                       , n=1, axis=0)))
Example #8
0
 def hqic(self):
     nobs = self.nobs
     # Lutkepohl
     # return np.log(self.sigma2)+ 2 * np.log(np.log(nobs))/nobs * self.k_ar
     # R uses all estimated parameters rather than just lags
     return (np.log(self.sigma2) + 2 * np.log(np.log(nobs))/nobs *
             (1 + self.df_model))
def computeCost(theta, X, y):
    theta.shape = (1, 3)
    m = y.size
    z=X.dot(theta.T)
    h = 1.0 / (1.0 + e ** (-1.0 * z))
    J = (1.0 / m) * ((-y.T.dot(log(h))) - ((1.0 - y.T).dot(log(1.0 - h))))
    return 1 * J.sum()
def sigma2(powerSpec, V, kmin, kmax, points=500,\
           window='tophat_x'):
    P = interp1d(powerSpec[:,0], powerSpec[:,1], kind='linear', \
                bounds_error=False, fill_value=0.)
        # P is a function

    # the following are arrays
    lnk = np.linspace(np.log(kmin), np.log(kmax), points, \
            endpoint=False) + (np.log(kmax)-np.log(kmin))/points/2
    k = np.exp(lnk)
    
    if isinstance(window, str):
        if window=='gaus':
            W = gaussian(k, V)
        elif window=='tophat_x':
            W = tophat_xspace(k, V)
        elif window=='tophat_k':
            W = tophat_kspace(k, V)
        elif window=='triangle_k':
            W = triangle_kspace(k, V)
        elif window=='tukey_k':
            W = tukey_kspace(k, V)
    elif isinstance(window, tuple):
        if window[0]=='tukey_k':
            W = tukey_kspace(k, V, window[1])

    integrand = k**3 * P(k) / (2.*np.pi**2) * np.abs(W)**2

    return np.sum(integrand) * (lnk[1]-lnk[0])
def compute_cost( X, y, theta, lam ):

    '''Compute cost for logistic regression.'''
    
    # Number of training examples
    m = y.shape[0]

    # Compute the prediction based on theta and X
    predictions = X.dot( theta )

    # Preprocessing values before sending to sigmoid function.
    # If the argument to sigmoid function >= 0, we know that the
    # sigmoid value is 1. Similarly for the negative values.
    predictions[ where( predictions >= 20 ) ] = 20
    predictions[ where( predictions <= -500 ) ] = -500
    hypothesis = sigmoid( predictions )

    hypothesis[ where( hypothesis == 1.0 ) ] = 0.99999

    # Part of the cost function without regularization
    J1 = ( -1.0 / m ) * sum( ( y * np.log( hypothesis ) ) + 
                            ( ( 1.0 - y ) * np.log( 1.0 - hypothesis ) ) ) 

    # Computing the regularization term
    J2 = lam / ( 2.0 * m ) * sum( theta[ 1:, ] * theta[ 1:, ] )
    error = hypothesis - y

    return J1 + J2
Example #12
0
 def _fgreen3d(self, z, y, x):
     ''' Return the periodic integrated greens funcion on the 'original'
     domain
     Qiang, Lidia, Ryne,Limborg-Deprey, PRSTAB 10, 129901 (2007)
     Args:
         x,y,z: arrays, e.g. x, y, z = np.meshgrid(xx, yy, zz)
     '''
     abs_r = np.sqrt(x * x + y * y + z * z)
     inv_abs_r = 1./abs_r
     tmpfgreen =  (-(  +    z*z * np.arctan(x*y*inv_abs_r/z)
                   +   y*y * np.arctan(x*z*inv_abs_r/y)
                   +   x*x * np.arctan(y*z*inv_abs_r/x)
                )/2.
                 + y*z*np.log(x+abs_r)
                 + x*z*np.log(y+abs_r)
                 + x*y*np.log(z+abs_r))
     fgreen = np.zeros((2 * self.mesh.nz,
                        2 * self.mesh.ny,
                        2 * self.mesh.nx), dtype=np.complex128)
     # evaluate the indefinite integral per cell (int_a^b f = F(b) - F(a))
     fgreen[:self.mesh.nz, :self.mesh.ny, :self.mesh.nx] = (
              tmpfgreen[ 1:,  1:,  1:]
             -tmpfgreen[:-1,  1:,  1:]
             -tmpfgreen[ 1:, :-1,  1:]
             +tmpfgreen[:-1, :-1,  1:]
             -tmpfgreen[ 1:,  1:, :-1]
             +tmpfgreen[:-1,  1:, :-1]
             +tmpfgreen[ 1:, :-1, :-1]
             -tmpfgreen[:-1, :-1, :-1]
             ) * 1./self.mesh.volume_elem # divide by vol_elem to average!
     return fgreen
Example #13
0
    def __init__(self, shape, successes, 
                 trials=None, coef=1., offset=None,
                 quadratic=None,
                 initial=None):

        smooth_atom.__init__(self,
                             shape,
                             offset=offset,
                             quadratic=quadratic,
                             initial=initial,
                             coef=coef)

        if sparse.issparse(successes):
            #Convert sparse success vector to an array
            self.successes = successes.toarray().flatten()
        else:
            self.successes = np.asarray(successes)

        if trials is None:
            if not set([0,1]).issuperset(np.unique(self.successes)):
                raise ValueError("Number of successes is not binary - must specify number of trials")
            self.trials = np.ones(self.successes.shape, np.float)
        else:
            if np.min(trials-self.successes) < 0:
                raise ValueError("Number of successes greater than number of trials")
            if np.min(self.successes) < 0:
                raise ValueError("Response coded as negative number - should be non-negative number of successes")
            self.trials = trials * 1.

        saturated = self.successes / self.trials
        deviance_terms = np.log(saturated) * self.successes + np.log(1-saturated) * (self.trials - self.successes)
        deviance_constant = -2 * coef * deviance_terms[~np.isnan(deviance_terms)].sum()

        devq = identity_quadratic(0,0,0,-deviance_constant)
        self.quadratic += devq
Example #14
0
def convergence_rates(m, solver_function, num_periods=8):
    """
    Возвращает m-1 эмпирическую оценку скорости сходимости, 
    полученную на основе m расчетов, для каждого из которых 
    шаг по времени уменьшается в два раза.
    solver_function(U, omega, tau, T) решает каждую задачу, 
    для которой T, получается на основе вычислений для 
    num_periods периодов.
    """
    from math import pi
    omega = 0.35; U = 0.3       # просто заданные значения
    P = 2*pi/omega              # период
    tau = P/30                  # 30 шагов на период 2*pi/omega
    T = P*num_periods

    tau_values = []
    E_values = []
    for i in range(m):
        u, t = solver_function(U, omega, tau, T, 1)
        u_e = u_exact(t, U, omega)
        E = np.sqrt(tau*np.sum((u_e-u)**2))
        tau_values.append(tau)
        E_values.append(E)
        tau = tau/2

    r = [np.log(E_values[i-1]/E_values[i])/
         np.log(tau_values[i-1]/tau_values[i])
         for i in range(1, m, 1)]
    return r
def klBern(x, y):
    r""" Kullback-Leibler divergence for Bernoulli distributions. https://en.wikipedia.org/wiki/Bernoulli_distribution#Kullback.E2.80.93Leibler_divergence

    .. math:: \mathrm{KL}(\mathcal{B}(x), \mathcal{B}(y)) = x \log(\frac{x}{y}) + (1-x) \log(\frac{1-x}{1-y})."""
    x = min(max(x, eps), 1 - eps)
    y = min(max(y, eps), 1 - eps)
    return x * np.log(x / y) + (1 - x) * np.log((1 - x) / (1 - y))
Example #16
0
def llr(sf_data, sf_map, coreference_map, sums, i1, i2):
    #i1 = sf_index(sf_data, sf1)
    #i2 = sf_index(sf_data, sf2)
    # compute actuaL cell frequencies
    # - outer cells
    ndd = float(sf_data['next-mention-index'])
    npd = float(sums[0,i1])
    ndp = float(sums[0,i2])
    nnd = ndd - npd 
    ndn = ndd - ndp
    # - inner cells
    mentions = [i for i in get_mentions(sf_data, sf_map, coreference_map, i1) if i in get_mentions(sf_data, sf_map, coreference_map, i2)]
    npp = float(len(mentions))
    npn = npd - npp
    nnp = ndp - npp
    nnn = nnd - nnp
    # compute (randomly) predicted cell frequencies
    enn = nnd * ndn / ndd
    enp = nnd * ndp / ndd
    epn = npd * ndn / ndd
    epp = npd * ndp / ndd
    #print npd, ndp, npp, ndd
    # compute log-likelihood ratio
    result = 0.0
    if nnn > 0: result += nnn * np.log(nnn / enn)
    if nnp > 0: result += nnp * np.log(nnp / enp)
    if npn > 0: result += npn * np.log(npn / epn)
    if npp > 0: result += npp * np.log(npp / epp)
    return 2.0 * result
Example #17
0
def compute_edge_weights( edge_ids, edge_probabilities, beta ):
    """
    Convert edge probabilities to energies for the multicut problem.
    
    edge_ids:
        The list of edges in the graph. shape=(N, 2)
    edge_probabilities:
        1-D, float (1.0 means edge is CUT, disconnecting the two SPs)
    beta:
        scalar (float)

    Special behavior:
        If any node has ID 0, all of it's edges will be given an
        artificially low energy, to prevent it from merging with its
        neighbors, regardless of what the edge_probabilities say.
    """
    p1 = edge_probabilities # P(Edge=CUT)
    p1 = np.clip(p1, 0.001, 0.999)
    p0 = 1.0 - p1 # P(Edge=NOT CUT)

    edge_weights = np.log(p0/p1) + np.log( (1-beta)/(beta) )

    # See note special behavior, above
    edges_touching_zero = edge_ids[:,0] == 0
    if edges_touching_zero.any():
        logger.warn("Volume contains label 0, which will be excluded from the segmentation.")
        MINIMUM_ENERGY = -1000.0
        edge_weights[edges_touching_zero] = MINIMUM_ENERGY
    
    return edge_weights
def test_skewed_chi2_sampler():
    """test that RBFSampler approximates kernel on random data"""

    # compute exact kernel
    c = 0.03
    # appreviations for easier formular
    X_c = (X + c)[:, np.newaxis, :]
    Y_c = (Y + c)[np.newaxis, :, :]

    # we do it in log-space in the hope that it's more stable
    # this array is n_samples_x x n_samples_y big x n_features
    log_kernel = ((np.log(X_c) / 2.) + (np.log(Y_c) / 2.) + np.log(2.) -
                  np.log(X_c + Y_c))
    # reduce to n_samples_x x n_samples_y by summing over features in log-space
    kernel = np.exp(log_kernel.sum(axis=2))

    # approximate kernel mapping
    transform = SkewedChi2Sampler(skewedness=c, n_components=1000,
                                  random_state=42)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)

    kernel_approx = np.dot(X_trans, Y_trans.T)
    assert_array_almost_equal(kernel, kernel_approx, 1)

    # test error is raised on negative input
    Y_neg = Y.copy()
    Y_neg[0, 0] = -1
    assert_raises(ValueError, transform.transform, Y_neg)
Example #19
0
def B(x,XW,n1,n2,kernel,logproductExpectations=None):
    """Computes B(x)=\int\Sigma_{0}(x,w,XW[0:n1],XW[n1:n1+n2])dp(w).
      
       Args:
          x: Vector of points where B is evaluated
          XW: Point (x,w)
          n1: Dimension of x
          n2: Dimension of w
          kernel
          logproductExpectations: Vector with the logarithm
                                  of the product of the
                                  expectations of
                                  np.exp(-alpha2[j]*((z-W[i,j])**2))
                                  where W[i,:] is a point in the history.
          
    """
    x=np.array(x).reshape((x.shape[0],n1))
    results=np.zeros(x.shape[0])
    #parameterLamb=parameterSetsPoisson
    X=XW[0:n1]
    inda=n1+n2
    W=XW[n1:inda]
    alpha2=0.5*((kernel.alpha[n1:n1+n2])**2)/scaleAlpha[n1:n1+n2]**2
    alpha1=0.5*((kernel.alpha[0:n1])**2)/scaleAlpha[0:n1]**2
    variance0=kernel.variance
    
    if logproductExpectations is None:
        logproductExpectations=0.0
        for j in xrange(n2):
	    temp=expectation(W[j],alpha2[j])
            logproductExpectations+=np.log(temp)
    for i in xrange(x.shape[0]):
        results[i]=logproductExpectations+np.log(variance0)-np.sum(alpha1*((x[i,:]-X)**2))
    return np.exp(results)
Example #20
0
def normalize_input(params):
    if pc_id == 0:
        print 'normalize_input'
        dt = params['dt_rate'] # [ms] time step for the non-homogenous Poisson process 
        L_input = np.zeros((params['n_exc'], params['t_stimulus']/dt))

        v_max = params['v_max']
        if params['log_scale']==1:
            v_rho = np.linspace(v_max/params['N_V'], v_max, num=params['N_V'], endpoint=True)
        else:
            v_rho = np.logspace(np.log(v_max/params['N_V'])/np.log(params['log_scale']),
                            np.log(v_max)/np.log(params['log_scale']), num=params['N_V'],
                            endpoint=True, base=params['log_scale'])
        v_theta = np.linspace(0, 2*np.pi, params['N_theta'], endpoint=False)
        index = 0
        for i_RF in xrange(params['N_RF_X']*params['N_RF_Y']):
            index_start = index
            for i_v_rho, rho in enumerate(v_rho):
                for i_theta, theta in enumerate(v_theta):
                    fn = params['input_rate_fn_base'] + str(index) + '.dat'
                    L_input[index, :] = np.loadtxt(fn)
                    print 'debug', fn
                    index += 1
            index_stop = index
            print 'before', i_RF, L_input[index_start:index_stop, :].sum()
            if (L_input[index_start:index_stop, :].sum() > 1):
                L_input[index_start:index_stop, :] /= L_input[index_start:index_stop, :].sum()
            print 'after', i_RF, L_input[index_start:index_stop, :].sum()

        for i in xrange(params['n_exc']):
            output_fn = params['input_rate_fn_base'] + str(i) + '.dat'
            print 'output_fn:', output_fn
            np.savetxt(output_fn, L_input[i, :])
    if comm != None:
        comm.barrier()
Example #21
0
File: gmm.py Project: kslin/CS181
def gm_assign_to_cluster(X, center_list, cov_list, p_k):
    """Assigns each sample to one of the Gaussian clusters given.
    
    Returns an array with numbers, 0 corresponding to the first cluster in the
    cluster list.
    """
    # Reused code from E-step, should be unified somehow:
    samples = X.shape[0]
    K = len(center_list)
    log_p_Xn_mat = np.zeros((samples, K))
    for k in range(K):
        log_p_Xn_mat[:, k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k])
    pmax = np.max(log_p_Xn_mat, axis=1)
    log_p_Xn = pmax + np.log(np.sum(np.exp(log_p_Xn_mat.T - pmax), axis=0).T)
    logL = np.sum(log_p_Xn)

    log_p_nk = np.zeros((samples, K))
    for k in range(K):
        # log_p_nk[:,k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k]) - log_p_Xn
        log_p_nk[:, k] = log_p_Xn_mat[:, k] - log_p_Xn

    print log_p_nk
    # Assign to cluster:
    maxP_k = np.c_[np.max(log_p_nk, axis=1)] == log_p_nk
    # print np.max(log_p_nk, axis=1)
    maxP_k = maxP_k * (np.array(range(K)) + 1)
    return np.sum(maxP_k, axis=1) - 1
Example #22
0
 def all_GL(self, q, maxpiv=None):
     """return (piv, f_binodal_gas, f_binodal_liquid, f_spinodal_gas, f_spinodal_liquid) at insersion works piv sampled between the critical point and maxpiv (default to 2.2*critical pressure)"""
     fc, pivc = self.critical_point(q)
     Fc = np.log(fc)
     #start sensibly above the critical point
     startp = pivc*1.1
     fm = fminbound(self.mu, fc, self.maxf(), args=(startp, q))
     fM = fminbound(lambda f: -self.pv(f, startp, q), 0, fc)
     initial_guess = np.log([0.5*fM, 0.5*(fm+self.maxf())])
     #construct the top of the GL binodal
     if maxpiv is None:
         maxpiv = startp*2
     topp = 1./np.linspace(1./startp, 1./maxpiv)
     topGL = [initial_guess]
     for piv in topp:
         topGL.append(self.binodalGL(piv, q, topGL[-1]))
     #construct the GL binodal between the starting piv and the critical point
     botp = np.linspace(startp, pivc)[:-1]
     botGL = [initial_guess]
     for piv in botp:
         botGL.append(self.binodalGL(piv, q, botGL[-1]))
     #join the two results and convert back from log
     binodal = np.vstack((
         [[pivc, fc, fc]],
         np.column_stack((botp, np.exp(botGL[1:])))[::-1],
         np.column_stack((topp, np.exp(topGL[1:])))[1:]
         ))
     #spinodal at the same pivs
     spinodal = self.spinodalGL(q, binodal[:,0])
     #join everything
     return np.column_stack((binodal, spinodal[:,1:]))
Example #23
0
    def loglike(self, endog, mu, scale=1.):
        """
        Loglikelihood function for Gamma exponential family distribution.

        Parameters
        ----------
        endog : array-like
            Endogenous response variable
        mu : array-like
            Fitted mean response variable
        scale : float, optional
            The default is 1.

        Returns
        -------
        llf : float
            The value of the loglikelihood function evaluated at
            (endog,mu,scale) as defined below.

        Notes
        --------
        llf = -1/scale * sum(endog/mu + log(mu) + (scale-1)*log(endog) +\
              log(scale) + scale*gammaln(1/scale))
        where gammaln is the log gamma function.
        """
        return - 1./scale * np.sum(endog/mu + np.log(mu) + (scale - 1) *
                                   np.log(endog) + np.log(scale) + scale *
                                   special.gammaln(1./scale))
def nie_all(xi1, xi2, xc1, xc2, b, s, q, rot, ys1, ys2):

    x1, x2 = xy_rotate(xi1, xi2, xc1, xc2, rot)

    wx = np.sqrt(q * q * (x1 * x1 + s * s) + x2 * x2)

    al1 = b / np.sqrt(1 - q * q) * np.arctan(x1 * np.sqrt(1 - q * q) / (wx + s))
    al2 = b / np.sqrt(1 - q * q) * np.arctanh(x2 * np.sqrt(1 - q * q) / (wx + q * q * s))

    kappa = b / (2.0 * wx)

    hx = np.sqrt((wx + s) ** 2.0 + (1 - q * q) * x1 * x1)
    phi = x1 * al1 + x2 * al2 - b * s * np.log(hx) + b * q * s * np.log((1 + q) * s)

    Kc = 1.0
    # Kc = (1.0+zl)/c*(Dl*Ds/Dls)
    td = Kc * (0.5 * ((al1) ** 2.0 + (al2) ** 2.0) - phi)
    # td = Kc*(0.5*((x1-ys1)**2.0+(x2-ys2)**2.0)-phi)

    y1 = x1 - al1
    y2 = x2 - al2

    y1, y2 = xy_rotate(y1, y2, xc1, xc2, -rot)

    # ------------------------------------------------------------------
    demon1 = ((wx + s) ** 2 + (1.0 - q * q) * x1 * x1) * wx
    demon2 = ((wx + q * q * s) ** 2 - (1.0 - q * q) * x2 * x2) * wx
    y11 = 1 - b * (wx * (wx + s) - q * q * x1 * x1) / demon1
    y22 = 1 - b * (wx * (wx + q * q * s) - x2 * x2) / demon2
    y12 = -b * x1 * x2 / demon1
    y21 = -b * x1 * x2 * q * q / demon2

    mu = 1.0 / (y11 * y22 - y12 * y21)

    return phi, td, al1, al2, kappa, mu, y1, y2
Example #25
0
    def loglike(self, endog, mu, scale=1.):
        """
        Loglikelihood function for inverse Gaussian distribution.

        Parameters
        ----------
        endog : array-like
            Endogenous response variable
        mu : array-like
            Fitted mean response variable
        scale : float, optional
            The default is 1.

        Returns
        -------
        llf : float
            The value of the loglikelihood function evaluated at
            (endog,mu,scale) as defined below.

        Notes
        -----
        `llf` = -(1/2.)*sum((endog-mu)**2/(endog*mu**2*scale)
                 + log(scale*endog**3) + log(2*pi))
        """
        return -.5 * np.sum((endog - mu)**2/(endog * mu**2 * scale)
                            + np.log(scale * endog**3) + np.log(2 * np.pi))
Example #26
0
  def _ComputeSampledLogitsNP(self, true_w, true_b, sampled_w, sampled_b,
                              hidden_acts,
                              num_true=1,
                              true_expected=None,
                              sampled_expected=None):

    batch_size, dim = hidden_acts.shape
    true_logits = np.sum(
        hidden_acts.reshape((batch_size, 1, dim)) * true_w.reshape(
            (batch_size, num_true, dim)),
        axis=2)
    true_b = true_b.reshape((batch_size, num_true))
    true_logits += true_b
    sampled_logits = np.dot(hidden_acts, sampled_w.T) + sampled_b

    if true_expected is not None:
      true_logits -= np.log(true_expected)
    if sampled_expected is not None:
      sampled_logits -= np.log(sampled_expected[np.newaxis, :])

    out_logits = np.concatenate([true_logits, sampled_logits], axis=1)
    out_labels = np.hstack((np.ones_like(true_logits) / num_true,
                            np.zeros_like(sampled_logits)))

    return out_logits, out_labels
Example #27
0
 def _SigmoidCrossEntropyWithLogits(logits, targets):
   # logits, targets: float arrays of the same shape.
   assert logits.shape == targets.shape
   pred = 1. / (1. + np.exp(-logits))
   eps = 0.0001
   pred = np.minimum(np.maximum(pred, eps), 1 - eps)
   return -targets * np.log(pred) - (1. - targets) * np.log(1. - pred)
Example #28
0
    def __init__(self, ps=None, sigma_v=0.0, redshift=0.0, **kwargs):
        if ps == None:
            from os.path import join, dirname
            #psfile = join(dirname(__file__),"data/ps_z1.5.dat")
            #psfile = join(dirname(__file__),"data/wigglez_halofit_z1.5.dat")
            psfile = join(dirname(__file__),"data/wigglez_halofit_z0.8.dat")
            print "loading matter power file: " + psfile
            redshift = 0.8

            #pk_interp = cs.LogInterpolater.fromfile(psfile)
            pwrspec_data = np.genfromtxt(psfile)

            (log_k, log_pk) = (np.log(pwrspec_data[:,0]), \
                               np.log(pwrspec_data[:,1]))

            logpk_interp = interpolate.interp1d(log_k, log_pk,
                                                bounds_error=False,
                                                fill_value=np.min(log_pk))

            pk_interp = lambda k: np.exp(logpk_interp(np.log(k)))

            kstar = 7.0
            ps = lambda k: np.exp(-0.5 * k**2 / kstar**2) * pk_interp(k)

        self._sigma_v = sigma_v

        RedshiftCorrelation.__init__(self, ps_vv=ps, redshift=redshift)
Example #29
0
def test_anisotropic_power():
    for n_coeffs in [6, 15, 28, 45, 66, 91]:
        for norm_factor in [0.0005, 0.00001]:

            # Create some really simple cases:
            coeffs = np.ones((3, n_coeffs))
            max_order = calculate_max_order(coeffs.shape[-1])
            # For the case where all coeffs == 1, the ap is simply log of the
            # number of even orders up to the maximal order:
            analytic = (np.log(len(range(2, max_order + 2, 2))) -
                        np.log(norm_factor))

            answers = [analytic] * 3
            apvals = anisotropic_power(coeffs, norm_factor=norm_factor)
            assert_array_almost_equal(apvals, answers)
            # Test that this works for single voxel arrays as well:
            assert_array_almost_equal(
                anisotropic_power(coeffs[1],
                                  norm_factor=norm_factor),
                answers[1])

    # Test that even when we look at an all-zeros voxel, this
    # avoids a log-of-zero warning:
    with warnings.catch_warnings(record=True) as w:
        assert_equal(anisotropic_power(np.zeros(6)), 0)
        assert len(w) == 0
 def test_der_log():
     x = np.linspace(0.001, 5, 6)
     h = 1e-15
     der1 = np.log(bicomplex(x + h * 1j, 0)).imag1 / h
     np.testing.assert_allclose(der1, 1./x)
     der2 = np.log(bicomplex(x + h * 1j, h)).imag12 / h**2
     np.testing.assert_allclose(der2, -1./x**2)
Example #31
0
e1 = np.zeros((len(n)))
e2 = np.zeros((len(n)))
e3 = np.zeros((len(n)))

for i in range(len(n)):
    I1[i], I2[i], I3[i] = SimpsonIntegrate(func1, 0, 1, n[i])

for i in range(len(n)):
    e1[i] = np.abs(I1[i] - solu)
    e2[i] = np.abs(I2[i] - solu)
    e3[i] = np.abs(I3[i] - solu)

xmesh = np.arange(0, 1, .001)
y = np.zeros(len(xmesh))

for i in range(len(xmesh)):
    y[i] = func1(xmesh[i])

plt.plot(xmesh, y, label='Given Integrand')
plt.legend()
plt.grid(True)
plt.show()

plt.plot(np.log(n), np.log(e1), label='Trapezoidal')
plt.plot(np.log(n), np.log(e2), label='Simpsons 1/3')
plt.plot(np.log(n), np.log(e3), label='Simpsons 3/8')
plt.grid(True)
plt.xlabel('log of n')
plt.ylabel('log of error')
plt.legend()
plt.show()
Example #32
0
from __future__ import division
from __future__ import absolute_import
from future import standard_library

standard_library.install_aliases()
from rlpy.Domains import PST
from rlpy.Agents import Greedy_GQ
from rlpy.Representations import *
from rlpy.Policies import eGreedy
from rlpy.Experiments import Experiment
import numpy as np
from hyperopt import hp

param_space = {  # 'discretization': hp.quniform("discretization", 5, 50, 1),
    'boyan_N0':
    hp.loguniform("boyan_N0", np.log(1e1), np.log(1e5)),
    'initial_learn_rate':
    hp.loguniform("initial_learn_rate", np.log(5e-2), np.log(1))
}


def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=3019.313,
                    initial_learn_rate=0.965830):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
def train(epoch, model):

    model = parallelize(model)
    model.train()

    total = 0
    correct = 0

    end = time.time()

    for i, (x, y) in enumerate(train_loader):

        global_itr = epoch * len(train_loader) + i
        update_lr(optimizer, global_itr)

        # Training procedure:
        # for each sample x:
        #   compute z = f(x)
        #   maximize log p(x) = log p(z) - log |det df/dx|

        x = x.to(device)

        beta = beta = min(
            1, global_itr /
            args.annealing_iters) if args.annealing_iters > 0 else 1.
        bpd, logits, logpz, neg_delta_logp = compute_loss(x, model, beta=beta)

        if args.task in ['density', 'hybrid']:
            firmom, secmom = estimator_moments(model)

            bpd_meter.update(bpd.item())
            logpz_meter.update(logpz.item())
            deltalogp_meter.update(neg_delta_logp.item())
            firmom_meter.update(firmom)
            secmom_meter.update(secmom)

        if args.task in ['classification', 'hybrid']:
            y = y.to(device)
            crossent = criterion(logits, y)
            ce_meter.update(crossent.item())

            # Compute accuracy.
            _, predicted = logits.max(1)
            total += y.size(0)
            correct += predicted.eq(y).sum().item()

        # compute gradient and do SGD step
        if args.task == 'density':
            loss = bpd
        elif args.task == 'classification':
            loss = crossent
        else:
            if not args.scale_dim:
                bpd = bpd * (args.imagesize * args.imagesize * im_dim)
            loss = bpd + crossent / np.log(
                2)  # Change cross entropy from nats to bits.
        loss.backward()

        if global_itr % args.update_freq == args.update_freq - 1:

            if args.update_freq > 1:
                with torch.no_grad():
                    for p in model.parameters():
                        if p.grad is not None:
                            p.grad /= args.update_freq

            grad_norm = torch.nn.utils.clip_grad.clip_grad_norm_(
                model.parameters(), 1.)
            if args.learn_p: compute_p_grads(model)

            optimizer.step()
            optimizer.zero_grad()
            update_lipschitz(model)
            ema.apply()

            gnorm_meter.update(grad_norm)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            s = ('Epoch: [{0}][{1}/{2}] | Time {batch_time.val:.3f} | '
                 'GradNorm {gnorm_meter.avg:.2f}'.format(
                     epoch,
                     i,
                     len(train_loader),
                     batch_time=batch_time,
                     gnorm_meter=gnorm_meter))

            if args.task in ['density', 'hybrid']:
                s += (
                    ' | Bits/dim {bpd_meter.val:.4f}({bpd_meter.avg:.4f}) | '
                    'Logpz {logpz_meter.avg:.0f} | '
                    '-DeltaLogp {deltalogp_meter.avg:.0f} | '
                    'EstMoment ({firmom_meter.avg:.0f},{secmom_meter.avg:.0f})'
                    .format(bpd_meter=bpd_meter,
                            logpz_meter=logpz_meter,
                            deltalogp_meter=deltalogp_meter,
                            firmom_meter=firmom_meter,
                            secmom_meter=secmom_meter))

            if args.task in ['classification', 'hybrid']:
                s += ' | CE {ce_meter.avg:.4f} | Acc {0:.4f}'.format(
                    100 * correct / total, ce_meter=ce_meter)

            logger.info(s)
        if i % args.vis_freq == 0:
            visualize(epoch, model, i, x)

        del x
        torch.cuda.empty_cache()
        gc.collect()
Example #34
0
for i in range(len(names)):
    aa = y_pres[i] - array_train[:, 0]
    loss = np.dot(weights, np.multiply(aa, aa))
    losses.append(loss)

min_index = np.argmin(losses)
print('****************Update precedure****************')
iteration = 1000
w = []
index = [min_index]
for iters in range(iteration):

    epsilon = np.dot(weights,
                     np.abs(y_pres[index[-1]] - array_train[:, 0]) / 2)
    w.append(0.5 * np.log(1 / epsilon - 1) + 0.3)
    if w[-1] < 0.01:
        break
    temp = np.multiply(
        weights,
        np.exp(-w[-1] * np.multiply(y_pres[index[-1]], array_train[:, 0])))
    weights = temp / np.sum(temp)
    losses = []

    for i in range(len(names)):
        aa = y_pres[i] - array_train[:, 0]
        loss = np.dot(weights, np.multiply(aa, aa))
        losses.append(loss)

    index.append(np.argmin(losses))
    print('Iteration = ', iters + 1, '  Index = ', index[-2],
Example #35
0
def main():
    # gt = sys.argv[1]
    lmin = -100  # int(sys.argv[2])
    lmax = 200  # int(sys.argv[3])
    fpaths = sys.argv[1:-1]
    out_pdf = sys.argv[-1]

    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)  # ax1.twinx()

    tools = ['MALVA', 'GATK', 'BCFtools', 'discoSnp++']
    colors = ['red', 'green', 'blue', 'orange']
    i = 0
    for fpath in fpaths:
        tps = {}
        fps = {}
        tots = {}
        for line in open(fpath):
            if line[0:4] == 'chr,':
                continue
            chrom, _gt, l, tp, fp, tot = line.strip('\n').split(',')
            if tot == "0":  # These are FPs, we don't need them
                continue
            # if _gt != gt:
            #     continue
            l = int(l)
            if lmin <= l <= lmax:
                tps[l] = tps[l] + int(tp) if l in tps else int(tp)
                fps[l] = fps[l] + int(fp) if l in fps else int(fp)
                tots[l] = tots[l] + int(tot) if l in tots else int(tot)

        tots_mod = {l: tots[l] + 1 for l in tots}
        ax1.scatter(sorted(tps.keys()),
                    [tps[l] / tots[l] for l in sorted(tps.keys())],
                    color=colors[i],
                    label=tools[i],
                    linewidths=0.0001,
                    alpha=0.75,
                    s=23)
        if i == 0:
            ax2.bar(sorted(tps.keys()),
                    [np.log(tots_mod[l]) for l in sorted(tps.keys())],
                    color="grey")
        i += 1
    # plt.xticks(np.arange(min(Xs), max(Xs)+1, 25))

    ax1.legend(loc=4, bbox_to_anchor=(1, -0.17), ncol=4)
    #ax1.set_title("Recall on {} indels".format(gt))
    ax1.get_xaxis().set_visible(False)
    ax1.set_ylabel("Recall")
    ax2.set_xlabel("Indel length (#bp)")
    ax2.set_ylabel("#indels (log scale)")
    ax2.set_ylim(0, 12)
    # xlabel('Item (s)')
    # ylabel('Value')
    # title('Python Line Chart: Plotting numbers')
    # grid(True)
    plt.subplots_adjust(top=0.99, bottom=0.09, right=0.99, left=0.07)
    DPI = fig.get_dpi()
    fig.set_size_inches(1366.0 / float(DPI), 768.0 / float(DPI))
    fig.savefig(out_pdf, dpi=DPI)
Example #36
0
from sklearn.model_selection import train_test_split
import numpy as np
import scipy as sp
fname = "C:/Users/AlessioB/Desktop/REFTEP ANN/sub-1_band-mu_iplv.mat"
mat1 = h5py.File(fname)
fname = "C:/Users/AlessioB/Desktop/REFTEP ANN/sub-1_band-betalow_iplv.mat"
mat2 = h5py.File(fname)
fname = "C:/Users/AlessioB/Desktop/REFTEP ANN/sub-1_band-betahigh_iplv.mat"
mat3 = h5py.File(fname)

X = np.hstack((mat1['iPLV'].value[:, ::20], mat2['iPLV'].value[:, ::20],
               mat3['iPLV'].value[:, ::20]))

Y = mat1['AmpsMclean'].value

Y = np.log(Y.T)
#Y=sp.stats.zscore(Y)
#plt.hist(Y)

Y = Y[:, 0]
threshold = np.median(Y)
Y[Y < threshold] = 0
Y[Y >= threshold] = 1

X = X[:, np.std(X, 0) > 0]
X = np.log(np.abs(X) / (1 - np.abs(X)))
#X=sp.stats.zscore(X)

#pca = PCA(n_components=2)
#pca.fit(X.T)
#Xred=pca.components_.T
Example #37
0
def func1(x):
    if (x == 0):
        return 1
    else:
        return ((np.log(1 + x)) / x)
Example #38
0
File: my_nb.py Project: myaGr/CS229
    output = nb_test(testMatrix, state)
    
    return evaluate(output, testCategory)

trainMatrix, tokenlist, trainCategory = readMatrix('MATRIX.TRAIN')
testMatrix, tokenlist, testCategory = readMatrix('MATRIX.TEST')

state = nb_train(trainMatrix, trainCategory)
output = nb_test(testMatrix, state)

evaluate(output, testCategory)

#problem b
b=[]
for i in range(1448):
    b.append((i,np.log(state[i][1])-np.log(state[i][0])))
    
b.sort(key=lambda i:i[-1],reverse=True)
key = b[:5]

word = []
for i in key:
    word.append(tokenlist[i[0]])
    
print(word)

#problem c
size = ['.50','.100','.200','.400','.800','.1400']
size1 = [50, 100, 200, 400, 800, 1400]
train = "MATRIX.TRAIN"
error = []
Example #39
0
 def limLog(self, x):
     MINLOG = 1e-1000
     return np.log(np.maximum(x, MINLOG))
def LiftedCondensationLevelTemp(init_temp_k, dew_init_temp_k): 
    if (init_temp_k<100.):
        init_temp_k = init_temp_k +273.15
    if (dew_init_temp_k<100.):
        dew_init_temp_k = dew_init_temp_k +273.15
    return (1./(1./(dew_init_temp_k-56) + log(init_temp_k/dew_init_temp_k)/800.)) + 56
Example #41
0
def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
    """Given the anchors and GT boxes, compute overlaps and identify positive
    anchors and deltas to refine them to match their corresponding GT boxes.

    anchors: [num_anchors, (y1, x1, y2, x2)]
    gt_class_ids: [num_gt_boxes] Integer class IDs.
    gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]

    Returns:
    rpn_match: [N] (int32) matches between anchors and GT boxes.
               1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    """
    ## RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
    ## RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))]
    rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))

    ## Handle COCO crowds
    ## A crowd box in COCO is a bounding box around several instances. Exclude
    ## them from training. A crowd box is given a negative class ID.
    no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)
    
    ## Compute overlaps [num_anchors, num_gt_boxes]
    overlaps = compute_overlaps(anchors, gt_boxes)

    ## Match anchors to GT Boxes
    ## If an anchor overlaps a GT box with IoU >= 0.7 then it's positive.
    ## If an anchor overlaps a GT box with IoU < 0.3 then it's negative.
    ## Neutral anchors are those that don't match the conditions above,
    ## and they don't influence the loss function.
    ## However, don't keep any GT box unmatched (rare, but happens). Instead,
    ## match it to the closest anchor (even if its max IoU is < 0.3).
    #
    ## 1. Set negative anchors first. They get overwritten below if a GT box is
    ## matched to them. Skip boxes in crowd areas.
    anchor_iou_argmax = np.argmax(overlaps, axis=1)
    anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
    rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1
    ## 2. Set an anchor for each GT box (regardless of IoU value).
    ## TODO: If multiple anchors have the same IoU match all of them
    gt_iou_argmax = np.argmax(overlaps, axis=0)
    rpn_match[gt_iou_argmax] = 1
    ## 3. Set anchors with high overlap as positive.
    rpn_match[anchor_iou_max >= 0.7] = 1

    ## Subsample to balance positive and negative anchors
    ## Don't let positives be more than half the anchors
    ids = np.where(rpn_match == 1)[0]
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)
    if extra > 0:
        ## Reset the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False)
        rpn_match[ids] = 0
    ## Same for negative proposals
    ids = np.where(rpn_match == -1)[0]
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
                        np.sum(rpn_match == 1))
    if extra > 0:
        ## Rest the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False)
        rpn_match[ids] = 0

    ## For positive anchors, compute shift and scale needed to transform them
    ## to match the corresponding GT boxes.
    ids = np.where(rpn_match == 1)[0]
    ix = 0  ## index into rpn_bbox
    ## TODO: use box_refinment() rather than duplicating the code here
    for i, a in zip(ids, anchors[ids]):
        ## Closest gt box (it might have IoU < 0.7)
        gt = gt_boxes[anchor_iou_argmax[i]]

        ## Convert coordinates to center plus width/height.
        ## GT Box
        gt_h = gt[2] - gt[0]
        gt_w = gt[3] - gt[1]
        gt_center_y = gt[0] + 0.5 * gt_h
        gt_center_x = gt[1] + 0.5 * gt_w
        ## Anchor
        a_h = a[2] - a[0]
        a_w = a[3] - a[1]
        a_center_y = a[0] + 0.5 * a_h
        a_center_x = a[1] + 0.5 * a_w

        ## Compute the bbox refinement that the RPN should predict.
        rpn_bbox[ix] = [
            (gt_center_y - a_center_y) / a_h,
            (gt_center_x - a_center_x) / a_w,
            np.log(gt_h / a_h),
            np.log(gt_w / a_w),
        ]
        ## Normalize
        rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
        ix += 1

    return rpn_match, rpn_bbox
Example #42
0
            step_all, probabilities = random_neighbour_avoiding(random_walks)
        else:
            step_all, probabilities = random_neighbour(random_walks[step])

        random_walks.append(step_all)
        weights.append(
            generate_weights(random_walks, weights[step], probabilities))

        if do_resample:
            random_walks, weights[-1] = resample(random_walks, weights[-1])

        print("-", end="")

    return random_walks, weights


walks, weights = init_walk(10, 20, 10**2, self_avoiding=True, do_resample=True)

print("")

cn = np.mean(weights, axis=1)
length = len(cn)

y = np.log(cn).T
X = np.hstack((np.ones(length), np.arange(length) + 1,
               np.log(np.arange(length) + 1))).reshape(3, length).T
theta = np.linalg.inv(X.T @ X) @ X.T @ y
print(np.e**theta)

# %%
# Here we confirm that all of the transactions in `train_identity`
print(np.sum(train_transaction.index.isin(train_identity.index.unique())))
print(np.sum(test_transaction.index.isin(test_identity.index.unique())))
train_transaction['TransactionDT'].head()
train_transaction['TransactionDT'].shape[0], train_transaction[
    'TransactionDT'].nunique()
train_transaction['TransactionDT'].value_counts().head(10)
fig, ax = plt.subplots(1, 2, figsize=(18, 4))

time_val = train_transaction['TransactionDT'].values

sns.distplot(time_val, ax=ax[0], color='r')
ax[0].set_title('Distribution of TransactionDT', fontsize=14)
ax[1].set_xlim([min(time_val), max(time_val)])

sns.distplot(np.log(time_val), ax=ax[1], color='b')
ax[1].set_title('Distribution of LOG TransactionDT', fontsize=14)
ax[1].set_xlim([min(np.log(time_val)), max(np.log(time_val))])

plt.show()
fig, ax = plt.subplots(1, 2, figsize=(18, 4))

time_val = train_transaction.loc[train_transaction['isFraud'] ==
                                 1]['TransactionDT'].values

sns.distplot(np.log(time_val), ax=ax[0], color='r')
ax[0].set_title('Distribution of LOG TransactionDT, isFraud=1', fontsize=14)
ax[1].set_xlim([min(np.log(time_val)), max(np.log(time_val))])

time_val = train_transaction.loc[train_transaction['isFraud'] ==
                                 0]['TransactionDT'].values
Example #44
0
 def num_steps(self):
     if self._num_steps is None:
         return 2 * int(np.round(16.0 / np.log(np.abs(self.step_ratio)))) + 1
     return self._num_steps
Example #45
0
            sent_vec.append(1)
        else:
            sent_vec.append(0)
    sentence_vectors.append(sent_vec)

sentence_vectors = np.asarray(sentence_vectors)

#tf ifd n=total number of documents

N = 1000
feture = np.zeros([1000, 200])
df = np.sum(sentence_vectors, axis=0)
for i in range(1000):
    for j in range(200):
        #        print(np.log(N/df[j]))
        feture[i, j] = (float)(sentence_vectors[i, j] * np.log(N / df[j]))


def pca(data, k):
    cov_data = np.cov(np.transpose(data))
    eig_val, eig_vector = np.linalg.eig(cov_data)

    def eigen_sort(value, vector):
        idx = value.argsort()[::-1]
        eigenValues = value[idx]
        eigenVectors = vector[:, idx]
        return (eigenValues, eigenVectors)

    eig_vals, eig_vectors = eigen_sort(eig_val, eig_vector)

    def final_projection(eigen_matrix, x, k):
    def cross_entropy_loss(self,y,y_pred,l2_penalty):
        
        cross_entropy = (np.sum(np.square(self.ip_to_hl.w)) + np.sum(np.square(self.hl_to_op.w))) * (l2_penalty/ (2 * len(y))) + (np.dot(np.transpose(y), np.log(y_pred + 1e-12)) + np.dot((1 - np.transpose(y)), np.log(1 - y_pred + 1e-12)))
         #print(cumulative)

        avg_batch_cross_entropy_loss = (-1.0)* np.sum(cross_entropy)/len(y)
        
        return avg_batch_cross_entropy_loss
Example #47
0
    def __load_dataset__(self, genome_scores, genome_tags, tags_applies, movies_df):
        tags_applies[TAG] = tags_applies[TAG].str.lower()
        values_with_pop = []
        for tid, t in genome_tags.as_matrix():
            tags_a = tags_applies.loc[tags_applies[TAG] == t.lower()]
            tags_total = np.array(tags_a[MOVIE_ID])
            values_with_pop.append([tid, t, len(tags_total)])
            self.logger.info(
                "Tag popularity for tag {} is: {}".format(t, len(tags_total))
            )
            if len(tags_total) == 0:
                self.logger.info((tid, t))
                self.logger.info(
                    "Tag popularity for tag {} is: {}".format(t, len(tags_total))
                )
                values_with_pop.append([tid, t, 2])
        cols = list(genome_tags.columns)
        cols.append(TAG_POPULARITY)
        genome_tags = pd.DataFrame(values_with_pop, columns=cols)
        doc_freq = []
        for i, tag in enumerate(genome_tags.values):
            df = genome_scores.loc[genome_scores["tagId"] == tag[0]]
            freq = np.sum(np.array(df["relevance"]) > 0.5)
            if freq == 0 or freq == 1:
                freq = 2
                self.logger.info("Document frequency for tag {} is: zero".format(tag))
            doc_freq.append(freq)
        genome_tags[DOC_FREQUENCY] = doc_freq
        genome_tags.to_csv(self.tags_info_file, index=False)
        self.logger.info(
            "Done loading the tag popularity and doc frequency for the tags"
        )
        self.weights = np.log(np.array(genome_tags[TAG_POPULARITY])) / np.log(
            np.array(genome_tags[DOC_FREQUENCY])
        )

        objects = []
        tags_rel = genome_scores.as_matrix()
        movie_ids = np.unique(np.array(genome_scores[MOVIE_ID]))
        for i, movie_id in enumerate(movie_ids):
            a = i * self.n_features
            b = (i + 1) * self.n_features
            objects.append(tags_rel[a:b, 2])
        objects = np.array(objects)
        movies_df = movies_df[movies_df[MOVIE_ID].isin(movie_ids)]

        for i, tag in enumerate(genome_tags.values):
            movies_df[tag[1]] = objects[:, i]
        movies_df.to_csv(self.movies_file, index=False)

        self.logger.info("Done loading the features for the movies")

        num_of_movies = movie_ids.shape[0]
        combinations_list = np.array(list(combinations(range(num_of_movies), 2)))
        similarity_matrix = dict()

        features = movies_df.as_matrix()[:, 3:]

        for i, j in combinations_list:
            similarity_matrix[get_key_for_indices(i, j)] = weighted_cosine_similarity(
                self.weights
            )(features[i], features[j])
            self.logger.info(
                "Calculating similarity {},{}, {}".format(
                    i, j, similarity_matrix[get_key_for_indices(i, j)]
                )
            )

        for i in range(num_of_movies):
            similarity_matrix[get_key_for_indices(i, i)] = 1.0

        series = pd.Series(similarity_matrix)
        matrix_df = pd.DataFrame(
            {"col_major_index": series.index, "similarity": series.values}
        )
        matrix_df.to_csv(self.similarity_matrix_file, index=False)
        self.logger.info(
            "Done calculating the similarity matrix stored at: {}".format(
                self.similarity_matrix_file
            )
        )
Example #48
0
def multimeter_error(value,
                     scale,
                     multimeter_type,
                     measure_type,
                     ignore_gain=False,
                     ignore_digit=False):
    """
    value is the value measured by the multimeter
    scale is the "end of scale" given by the multimeter, 
        or the v/div on the oscilloscope
    multimeter_type is:
        'a' for agilent
        'm' for metrix
        'o' for oscilloscope
    measure_type is:
        'a' for current
        'v' for tension
        'ohm' for resistance
        'c' for capacity
        's' for time
     
    Returns the error of the measure.
    """
    if (multimeter_type == 'a'):
        if (measure_type == 'a'):
            scale_array = np.array([6 * 10**(-5), 6 * 10**(-4), 6, 10])
            resolution_array = float(10)**(np.array([-8, -7, -3, -2]))
            percent_accuracy = np.array([1, 1, 1, 1])
            digit_accuracy = np.array([2, 2, 3, 3])
        elif (measure_type == 'v'):
            scale_array = 6 * np.array([10**(-1), 1, 10, 100])
            resolution_array = float(10)**(np.array([-4, -3, -2, -1]))
            percent_accuracy = 0.5 * np.array([1, 1, 1, 1])
            digit_accuracy = 2 * np.array([1, 1, 1, 1])
        elif (measure_type == 'ohm'):
            scale_array = 6 * float(10)**(np.array([2, 3, 4, 5, 6, 7]))
            resolution_array = float(10)**(np.array([2, 3, 4, 5, 6, 7]) - 3)
            percent_accuracy = np.array([0.9, 0.9, 0.9, 0.9, 0.9, 1.5])
            digit_accuracy = 3 * np.array([1, 1, 1, 1, 1, 1])
        else:
            print(f'{measure_type} is not a valid measure type')
            return (None)
    elif (multimeter_type == 'm'):
        if (measure_type == 'v'):
            scale_array = np.array([1, 10, 100, 1000])
            resolution_array = float(10)**(np.array([-5, -4, -3, -2]))
            percent_accuracy = np.array([0.05, 0.03, 0.03, 0.035])
            digit_accuracy = 8 * np.array([1, 1, 1, 1])
        elif (measure_type == 'a'):
            scale_array = float(10)**(np.array([-3, -2, -1, 0, 1]))
            resolution_array = float(10)**(np.array([-3, -2, -1, 0, 1]) - 5)
            percent_accuracy = np.array([0.1, 0.08, 0.08, 0.15, 0.5])
            digit_accuracy = np.array([15, 8, 8, 8, 15])
        elif (measure_type == 'ohm'):
            scale_array = float(10)**(np.array([3, 4, 5, 6, 7, 8]))
            resolution_array = float(10)**(np.array([3, 4, 5, 6, 7, 8]) - 5)
            percent_accuracy = np.array([0.1, 0.07, 0.07, 0.07, 1, 3])
            digit_accuracy = np.array([8, 8, 8, 8, 80, 80])
        elif (measure_type == 'c'):
            scale_array = float(10)**(np.array(
                [-9, -8, -7, -6, -5, -4, -3, -2]))
            resolution_array = float(10)**(
                np.array([-9, -8, -7, -6, -5, -4, -3, -2]) - 3)
            percent_accuracy = np.array([2.5, 1, 1, 1, 1, 1, 1, 1.5])
            digit_accuracy = np.array([15, 8, 8, 10, 10, 10, 15, 15])
        else:
            print(f'{measure_type} is not a valid measure type')
            return (None)
    elif (multimeter_type == 'o'):
        if (measure_type == 'v'):
            mag_ord = float(10)**(np.arange(-9, 2))
            scale_array = np.concatenate((mag_ord, 2 * mag_ord, 5 * mag_ord))
            resolution_array = scale_array / 10
            percent_accuracy = 0.01 * np.ones(len(scale_array))
            digit_accuracy = np.ones(len(scale_array))
        elif (measure_type == 's'):
            scale_array = float(10)**(np.arange(-9, 2))
            resolution_array = scale_array / 10
            percent_accuracy = 3 * np.ones(len(scale_array))
            digit_accuracy = np.ones(len(scale_array))
        else:
            print(f'{measure_type} is not a valid measure type')
            return (None)
    else:
        print(f'{multimeter_type} is not a valid multimeter type')
        return (None)

    if (not (len(scale_array) == len(resolution_array) == len(percent_accuracy)
             == len(digit_accuracy))):
        print('Error in hardcoded values')
        return (None)

    tolerance = 0.01
    index = -1
    for i in range(len(scale_array)):
        if (np.abs(np.log(scale_array[i]) - np.log(scale)) < tolerance):
            index = i
    if (index == -1):
        print(f'{scale} is an invalid scale')
        return (None)

    distribution_factor = 1 / np.sqrt(3)

    if (ignore_gain == False and ignore_digit == False):
        error = np.sqrt((percent_accuracy[index] * value / 100)**2 +
                        (digit_accuracy[index] *
                         resolution_array[index])**2) * distribution_factor
    elif (ignore_gain == True and ignore_digit == False):
        error = digit_accuracy[index] * resolution_array[
            index] * distribution_factor
    elif (ignore_gain == False and ignore_digit == True):
        error = percent_accuracy[index] * value / 100 * distribution_factor
    else:
        print('Cannot ignore both errors')
        return (None)

    return (error)
Example #49
0
def interpolated_broadening(sigma=None, points=None, bins=None,
                            center=None, weights=1, is_hist=False, limit=3,
                            function='gaussian', spacing='sqrt2'):
    """Return a fast estimate of frequency-dependent broadening

    Consider a spectrum of two peaks, in the case where (as in indirect-geometry INS) the peak width
    increases with frequency.

       |
       |        |
       |        |
    -----------------

    In the traditional scheme we broaden each peak individually and combine:

       *                      |                       *
       *        |       +     |        *       =      *        *
      * *       |             |      *   *           * *     *   *
    -----------------      -----------------       -----------------

    Instead of summing over broadening kernels evaluated at each peak, the approximate obtains
    a spectrum corresponding to the following scheme:

    - For each sigma value, the entire spectrum is convolved with an
      appropriate-width broadening function
    - At each frequency, the final spectrum value is drawn from the spectrum
      broadened with corresponding sigma.

    Compared to a summation over broadened peaks, this method introduces an
    asymmetry to the spectrum about each peak.

       *                    *                    *                         *
       *        *          * *       *          * *       *      -->       **       *
      * *       *         *   *     * *       *     *   *   *             *  *     *  *
    ----------------- ,  ----------------- ,  -----------------         -----------------

    This asymmetry should be tolerable as long as the broadening function
    varies slowly in frequency relative to its width.

    The benefit of this scheme is that we do not need to evaluate the
    convolution at every sigma value; nearby spectra can be interpolated.
    Trial-and-error finds that with optimal mixing the error of a Gaussian
    approximated by mixing a wider and narrower Gaussian is ~ 5% when the sigma
    range is factor of 2, and ~ 1% when the sigma range is a factor of sqrt(2).
    A pre-optimised transfer function can be used for a fixed ratio between the
    reference functions.

    :param sigma: widths of broadening functions (passed to "sigma" argument of function)
    :type sigma: float or Nx1 array
    :param bins: sample bins for function evaluation. This _must_ be evenly-spaced.
    :type bins: 1-D array
    :param points: regular grid of points for which function should be evaluated.
    :type points: 1-D array
    :param center: centers of broadening functions
    :type center: float or Nx1 array
    :param weights: weights of peaks for summation
    :type weights: float or array corresponding to "center"
    :param is_hist:
        If "weights" is already a histogram corresponding to evenly-spaced
        frequencies, set this to True to avoid a redundant binning operation.
    :type is_hist: bool
    :param function: broadening function; currently only 'gaussian' is accepted
    :type function: str
    :param limit: range (as multiple of sigma) for cutoff
    :type limit: float
    :param spacing:
        Spacing factor between Gaussian samples on log scale. This is not a
        free parameter as a pre-computed curve is used for interpolation.
        Allowed values: '2', 'sqrt2', with error ~5% and ~1% respectively.
    :type spacing: str

    :returns: (points, spectrum)
    :returntype: (1D array, 1D array)

    """
    mix_functions = {'gaussian': {'2': {'lower': [-0.1873, 1.464, -4.079, 3.803],
                                        'upper': [0.2638, -1.968, 5.057, -3.353]},
                                  'sqrt2': {'lower': [-0.6079, 4.101, -9.632, 7.139],
                                            'upper': [0.7533, -4.882, 10.87, -6.746]}}}
    log_bases = {'2': 2, 'sqrt2': np.sqrt(2)}
    log_base = log_bases[spacing]

    # Sample on appropriate log scale: log_b(x) = log(x) / log(b)
    n_kernels = int(np.ceil(np.log(max(sigma) / min(sigma)) / np.log(log_base))) + 1

    if n_kernels == 1:  # Special case: same width everywhere, only need one kernel
        sigma_samples = np.array([min(sigma)])
    else:
        sigma_samples = log_base**np.arange(n_kernels) * min(sigma)

    bin_width = bins[1] - bins[0]

    # Get set of convolved spectra for interpolation
    if is_hist:
        hist = weights
    else:
        hist, _ = np.histogram(center, bins=bins, weights=weights, density=False)
    freq_range = 3 * max(sigma)
    kernel_npts_oneside = np.ceil(freq_range / bin_width)

    if function == 'gaussian':
        kernels = mesh_gaussian(sigma=sigma_samples[:, np.newaxis],
                                points=np.arange(-kernel_npts_oneside, kernel_npts_oneside + 1, 1) * bin_width,
                                center=0)
    else:
        raise ValueError('"{}" kernel not supported for "interpolate" broadening method.'.format(function))

    spectra = np.array([convolve(hist, kernel, mode='same') for kernel in kernels])

    # Interpolate with parametrised relationship
    sigma_locations = np.searchsorted(sigma_samples, sigma) # locations in sampled values of points from sigma
    spectrum = np.zeros_like(points)
    # Samples with sigma == min(sigma) are a special case: copy directly from spectrum
    spectrum[sigma_locations==0] = spectra[0, sigma_locations==0]

    for i in range(1, len(sigma_samples)):
        masked_block = (sigma_locations == i)
        sigma_factors = sigma[masked_block] / sigma_samples[i - 1]
        lower_mix = np.polyval(mix_functions[function][spacing]['lower'], sigma_factors)
        upper_mix = np.polyval(mix_functions[function][spacing]['upper'], sigma_factors)

        spectrum[masked_block] = (lower_mix * spectra[i-1, masked_block]
                                  + upper_mix * spectra[i, masked_block])

    return points, spectrum
Example #50
0
def get_network_numpool(patch_size, maxpool_cap=999, min_feature_map_size=4):
    network_numpool_per_axis = np.floor([np.log(i / min_feature_map_size) / np.log(2) for i in patch_size]).astype(int)
    network_numpool_per_axis = [min(i, maxpool_cap) for i in network_numpool_per_axis]
    return network_numpool_per_axis
Example #51
0
def bubble_plot(df,
                x,
                y,
                ordered_x_values=None,
                ordered_y_values=None,
                bins_x=10,
                bins_y=10,
                fontsize=16,
                figsize=(15, 10),
                maximal_bubble_size=5000,
                normalization_by_all=False,
                log=False):
    """
    :param df: dataframe
    :param x:  name of first numerical/categorical field (string) (for x-axis)
    :param y: name of second numerical/categorical field (string) (for y-axis)
    :param ordered_x_values: the values we would like to map from x categorical variable 
    according to the order we would like to present them
    :param ordered_y_values: the values we would like to map from the y categorical variable 
    according to the order we would like to present them
    :param bins_x: the bins for x values if x is numberic
    :param bins_y: the bins for y values if y is numberic
    :param normalization_by_all: True - shows joint distribution p(x,y), False - shows conditional distribution p(y|x)
    :param maximal_bubble_size: if the bubbles are too big or too small this is the parameter you should change!
    :param log: whether to apply log on the count (influence the size of the bubbles)
    :return: nice bubble plot :)
    """
    plt.figure(figsize=figsize)
    x_is_numeric = df[x].dtype in (float, int)
    y_is_numeric = df[y].dtype in (float, int)
    count_table = pd.concat([
        pd.cut(df[x], bins=bins_x) if x_is_numeric else df[x],
        pd.cut(df[y], bins=bins_y) if y_is_numeric else df[y]
    ],
                            axis=1)
    count_table = count_table.groupby(x)[y].value_counts().unstack().fillna(0)
    ordered_x_values = count_table.index.values if ordered_x_values is None else ordered_x_values
    ordered_y_values = count_table.columns if ordered_y_values is None else ordered_y_values
    if normalization_by_all:
        count_table /= count_table.sum().sum()
    else:
        for col in count_table.columns:
            count_table[col] /= count_table[col].sum()
    if log:
        count_table = np.log(count_table)
        maximal_bubble_size /= 2
    size_factor = maximal_bubble_size / count_table.max().max()
    count_table_long = pd.melt(count_table.reset_index(), id_vars=x)
    x_values_dict = {x:i for i, x in enumerate(ordered_x_values)} \
        if not x_is_numeric else {xx:get_point(xx) for xx in ordered_x_values}
    y_values_dict = {x:i for i, x in enumerate(ordered_y_values)} \
        if not y_is_numeric else {xx: get_point(xx) for xx in ordered_y_values}
    count_table_long[x] = count_table_long[x].map(x_values_dict)
    count_table_long[y] = count_table_long[y].map(y_values_dict)
    xticks = np.arange(count_table.shape[0]) if not x_is_numeric else [
        get_point(xx) for xx in ordered_x_values
    ]
    yticks = np.arange(count_table.shape[1]) if not y_is_numeric else [
        get_point(xx) for xx in ordered_y_values
    ]
    xticklabels = ordered_x_values if not x_is_numeric else [
        get_point(xx) for xx in ordered_x_values
    ]
    yticklabels = ordered_y_values if not y_is_numeric else [
        get_point(xx) for xx in ordered_y_values
    ]
    plt.scatter(count_table_long[x],
                count_table_long[y],
                s=size_factor * count_table_long['value'],
                c=count_table_long['value'],
                cmap='cool')
    plt.xticks(xticks, xticklabels, fontsize=fontsize)
    plt.yticks(yticks, yticklabels, fontsize=fontsize)
    plt.xlabel(x, fontsize=fontsize)
    plt.ylabel(y, fontsize=fontsize)
    plt.title("{} vs {} ".format(y, x), fontsize=fontsize + 4)
Example #52
0
def trading(stock_1,stock_2,train=60,trade=1,delta=1/252, interest= 0.02):
    ts_1 = np.asarray(test_df.iloc[stock_1,:])
    ts_2 = np.asarray(test_df.iloc[stock_2,:])
    price_1 = np.asarray(test_price_df.iloc[stock_1,:])
    price_2 = np.asarray(test_price_df.iloc[stock_2,:])
    t=train
    initial_wealth = 1.
    duration = len(ts_1)-train+1
    q_stock_1 = np.zeros(duration)
    q_stock_2 = np.zeros(duration)
    wealth = np.full(duration,initial_wealth)
    bank = wealth
    #print(type(bank[0]))
    cash = initial_wealth
    kappa = None
    while(t+trade<len(ts_1)):
        train_ts_1 = ts_1[t-train:t]
        train_ts_2 = ts_2[t-train:t]
        LR_model = LinearRegression().fit(train_ts_2.reshape(-1,1),train_ts_1)
        beta = LR_model.coef_
        train_res= train_ts_1 - (beta)*train_ts_2
        model = statsmodels.tsa.api.ARMA(train_res,order=(1,0)).fit(disp=False)
        a, b = model.params
        xi = model.resid 
        #kappa = (-b+1)/delta
        #mean = a/(kappa*delta)
        #sigmaeq = np.sqrt(np.var(xi)/delta)
        previous_kappa = kappa 
        if b>0:
            kappa= -np.log(b)/delta
        else: 
            if previous_kappa is None:
                kappa = 10e5
        #print(kappa)
        mean = a/(1-b)
        sigmasq = np.var(xi)*2*kappa/(1-b**2) 
        sigmaeq = np.sqrt(sigmasq/(2*kappa))
        for i in range(trade):
            if t+i>train:
                q_stock_1[t+i-train] = q_stock_1[t+i-1-train]
                q_stock_2[t+i-train] = q_stock_2[t+i-1-train]
                cash = bank[t+i-1-train]*((1+0.02/252)**(1/252))
            signal = ((ts_1[t+i]-(beta)*ts_2[t+i])-mean)/sigmaeq
            if signal > ssen:
                if q_stock_1[t+i-train] ==0:
                    q_stock_1[t+i-train] -= 1
                    q_stock_2[t+i-train] += beta
                    cash = cash + price_1[t+i] - beta*price_2[t+i]
            elif (signal <ssex and signal > slex):
                cash = cash + q_stock_1[t+i-train]*price_1[t+i] + q_stock_2[t+i-train]*price_2[t+i]
                q_stock_1[t+i-train] = 0
                q_stock_2[t+i-train] = 0 
            elif (signal < slen): 
                if q_stock_1[t+i-train] == 0:
                    q_stock_1[t+i-train] += 1
                    q_stock_2[t+i-train] -= beta
                    cash = cash - price_1[t+i] + beta*price_2[t+i]
            bank[t+i-train] = cash 
            wealth[t+i-train] = cash+ q_stock_1[t+i-train]*price_1[t+i]+q_stock_2[t+i-train]*price_2[t+i]
        t=t+trade
    return wealth, q_stock_1, train_res
Example #53
0
 def compute_entropy(self, x):
     H = 0
     for i in range(len(x)):
         H += (x[i] * np.log(x[i]))
     return H
    #plt.show()

    # fetch scattering kernel
    sigma = getKernel(__screenfile__)

    # sample positions
    t1 = np.tile(np.arange(m.nx), m.nx)
    t0 = np.repeat(np.arange(m.nx), m.nx, axis=0)
    t = np.hstack([t0[:, np.newaxis], t1[:, np.newaxis]])

    # fit
    w = m.source.sum() / np.sum(noise**2 * m.source)

    tic = time.time()
    #initial = np.array([np.log(1./w),np.log(0.5),ftot, fwhm])
    initial = np.array([np.log(1. / w), np.log(2), ftot, fwhm])
    res = minimize(lnprob,initial,\
       args=(m,mNoisy,w,sigma,t),\
       method='Nelder-Mead',\
       options={'disp':True,'maxiter':int(1000)})
    print 'optimization took %0.2fs' % (time.time() - tic)

    print 'result:', res.x

    #tic = time.time()
    #args = (m,mNoisy,w,sigma,t)
    #f = lnprob(initial,*args)
    #print '1 exec took %0.2f' % (time.time() - tic)

    # best fit noise model
    #p = initial
Example #55
0
    def interpolate(self, S_0, S_1, S_2, interpolation='gaussian'):
        """
    Use interpolation to refine an FFT frequency estimate.

    .. image:: /_static/interpolation_diagram.png
      :align: center
      :alt: Interpolation diagram

    For an FFT bin spacing of :math:`\delta f`, the input frequency is
    estimated as:

    .. math:: f_{in} \\approx \delta f (k + \Delta)

    Where :math:`k` is the FFT bin with the maximum magnitude and
    :math:`\Delta \in [-\\frac{1}{2}, \\frac{1}{2}]` is a correction found by
    interpolation.

    **Parabolic interpolation:**

    .. math:: \Delta = \\frac{1}{2} \\frac{S[k+1] - S[k-1]}{2S[k] - S[k-1] - S[k+1]}

    Where :math:`S[n]` is the magnitude of FFT bin :math:`n`.

    **Gaussian interpolation:**

    .. math:: \Delta = \\frac{1}{2} \\frac{\ln(S[k+1]) - \ln(S[k-1])}{2\ln(S[k]) - \ln(S[k-1]) - \ln(S[k+1])}

    The Gaussian interpolation method gives better results, especially when
    used with a Gaussian window function, at the expense of computational
    complexity. See [1]_ for detailed comparison.


    Parameters
    ----------
    S_0 : float
      :math:`S[k-1]`, i.e. the magnitude of FFT bin one before the maxima.
    S_1 : float
      :math:`S[k]` i.e. the magnitude of the maximum FFT.
    S_2 : float
      :math:`S[k+1]`, i.e. the magnitude of FFT bin one after the maxima.

    Returns
    -------
    out : float
      The fractional number of FFT bins :math:`\Delta` that the interpolated
      maximum is from the maximum point :math:`S[k]`.

    References
    ----------

    .. [1] Gasior, M. et al., "Improving FFT frequency measurement resolution
       by parabolic and Gaussian spectrum interpolation" AIP Conf.Proc. 732
       (2004) 276-285 `CERN-AB-2004-023-BDI
       <http://cdsweb.cern.ch/record/738182>`_

    """
        if interpolation == 'parabolic':
            # Parabolic interpolation.
            return 0.5 * (S_2 - S_0) / (2 * S_1 - S_0 - S_2)
        elif interpolation == 'gaussian':
            # Gaussian interpolation.
            ln_S_0 = np.log(S_0)
            ln_S_1 = np.log(S_1)
            ln_S_2 = np.log(S_2)
            return 0.5 * (ln_S_2 - ln_S_0) / (2 * ln_S_1 - ln_S_0 - ln_S_2)
        elif interpolation == 'none':
            return 0
        else:
            raise ValueError("Unknown interpolation mode '%s'", interpolation)
Example #56
0
# ### Score features
# 
# There are several ways to score features : 
#  - Compute the number of samples in the actual importances that are away from the null importances recorded distribution.
#  - Compute ratios like Actual / Null Max, Actual  / Null Mean,  Actual Mean / Null Max
#  
# In a first step I will use the log actual feature importance divided by the 75 percentile of null distribution.

# In[13]:


feature_scores = []
for _f in actual_imp_df['feature'].unique():
    f_null_imps_gain = null_imp_df.loc[null_imp_df['feature'] == _f, 'importance_gain'].values
    f_act_imps_gain = actual_imp_df.loc[actual_imp_df['feature'] == _f, 'importance_gain'].mean()
    gain_score = np.log(1e-10 + f_act_imps_gain / (1 + np.percentile(f_null_imps_gain, 75)))  # Avoid didvide by zero
    f_null_imps_split = null_imp_df.loc[null_imp_df['feature'] == _f, 'importance_split'].values
    f_act_imps_split = actual_imp_df.loc[actual_imp_df['feature'] == _f, 'importance_split'].mean()
    split_score = np.log(1e-10 + f_act_imps_split / (1 + np.percentile(f_null_imps_split, 75)))  # Avoid didvide by zero
    feature_scores.append((_f, split_score, gain_score))

scores_df = pd.DataFrame(feature_scores, columns=['feature', 'split_score', 'gain_score'])

plt.figure(figsize=(16, 16))
gs = gridspec.GridSpec(1, 2)
# Plot Split importances
ax = plt.subplot(gs[0, 0])
sns.barplot(x='split_score', y='feature', data=scores_df.sort_values('split_score', ascending=False).iloc[0:70], ax=ax)
ax.set_title('Feature scores wrt split importances', fontweight='bold', fontsize=14)
# Plot Gain importances
ax = plt.subplot(gs[0, 1])
Example #57
0

fig = plt.figure(figsize=(width, height))

xlabels = size**2;

plt.loglog(xlabels, timing_LowFreq, label='Solve', color='b', linewidth=2, linestyle='--', marker='.', markersize=8.0, zorder=2)
#plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))


# plt.plt.loglog(xlabels, timing_LowFreq, label='LowFrequency', color='b', linewidth=2, linestyle='--', marker='.', markersize=8.0, zorder=2)
plt.loglog(size**2, timing_LowFact, label='Setup', color='g', linewidth=2, linestyle='--', marker='o', markersize=8.0, zorder=2)

# plt.loglog(size**2, timing_gauss, label='Gaussian bumps', color='g', linewidth=2, linestyle='--', marker='.', markersize=8.0, zorder=2)

plt.loglog(xlabels, (xlabels*np.log(xlabels)**4/(xlabels[0]*np.log(xlabels[0])**4))*timing_LowFreq[0]*1.05, label=r'$\mathcal{O}(N \log^3{N})$', color='k', linewidth=2, linestyle='solid', markersize=8.0, zorder=2)
# #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
plt.loglog(xlabels, (xlabels*np.log(xlabels)/(xlabels[0]*np.log(xlabels[0])))*timing_LowFact[0]*1.05, label=r'$\mathcal{O}(N \log{N})$', color='r', linewidth=2, linestyle='solid', markersize=8.0, zorder=2)

# # plt.loglog(N_x**2, N_x**2 / 4.0e4, label=r' ', color='white', linewidth=0.0)

plt.legend(loc=2, ncol=1, frameon=False, fontsize=14.85)

# plt.title('Normalized run-time for inner loop')

plt.xlabel(r'$N=n^2$', fontsize=18)
plt.ylabel('Time [s]', fontsize=18)

plt.gca().tick_params(labelsize=14)

plt.autoscale(True, 'both', True)
Example #58
0
 def _log_sigmoid(self, x):
   return np.log(self._sigmoid(x))
Example #59
0
def softmax_loss_naive(W, X, y, reg):
    """
    Softmax loss function, naive implementation (with loops)

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
      that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)
    N = X.shape[0]
    C = W.shape[1]
    y_pred = X.dot(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    for i in range(N):
        current_scores = y_pred[i, :]

        # Fix for numerical stability by subtracting max from score vector.
        shift_scores = current_scores - np.max(current_scores)

        # Calculate loss for this example.
        loss_ii = -shift_scores[y[i]] + np.log(np.sum(np.exp(shift_scores)))
        loss += loss_ii

        for j in range(C):
            softmax_score = np.exp(shift_scores[j]) / np.sum(
                np.exp(shift_scores))

            # Gradient calculation.
            if j == y[i]:
                dW[:, j] += (-1 + softmax_score) * X[i]
            else:
                dW[:, j] += softmax_score * X[i]

    # Average over the batch and add our regularization term.
    loss /= N
    loss += reg * np.sum(W * W)

    # Average over the batch and add derivative of regularization term.
    dW /= N
    dW += 2 * reg * W

    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW
Example #60
0
    def identify(self, genome, actual_node, segment_detector):
        id_map = self._population.id_mapping
        length_classifier = self._length_classifier
        # TODO: Eliminated shared_list and use shared_dict everywhere
        shared_list = []
        anchors = set(length_classifier._labeled_nodes) - self.exclude_anchors
        sorted_labeled = sorted(anchors)
        np_sorted_labeled = np.array(sorted_labeled, dtype=np.uint32)
        sorted_shared = []
        for labeled_node_id in sorted_labeled:
            labeled_node = id_map[labeled_node_id]
            s = segment_detector.shared_segment_length(
                genome, labeled_node.suspected_genome)
            shared_list.append((labeled_node_id, s))
            sorted_shared.append(s)

        write_log("positive ibd count", sum(0.0 < x for x in sorted_shared))
        #write_log("shared", sorted_shared)
        shared_dict = dict(shared_list)
        sorted_shared = np.array(sorted_shared, dtype=np.float64)

        labeled_nodes_cryptic, all_lengths = list(zip(*shared_dict.items()))
        np_cryptic = np.log(
            length_classifier.get_batch_smoothing_gamma(sorted_shared))

        node_data = []
        batch_shape = []
        batch_scale = []
        batch_zero_prob = []
        batch_lengths = []
        # Keep for logging purposes
        # batch_cryptic_lengths = []
        nodes = self._to_search(shared_list, actual_node.sex)
        if len(nodes) == 0:
            # We have no idea which node it is
            return RawIdentified(set(), float("-inf"), None)

        for node in nodes:
            node_start_i = len(batch_shape)
            node_id = node._id
            #node_cryptic_log_probs[node] = 0

            if node_id in length_classifier._distributions:
                labeled_ids, shape, scale, zero_prob = length_classifier._distributions[
                    node_id]
            else:
                labeled_ids = np.array([], dtype=np.uint32)
                shape = scale = zero_prob = np.array([], dtype=np.float64)
            calc_data = calculate_probabilities(labeled_ids, shape, scale,
                                                zero_prob, sorted_shared,
                                                np_sorted_labeled, np_cryptic,
                                                node_id)
            cur_lengths, cur_shapes, cur_scales, cur_zero_prob, cur_cryptic = calc_data
            batch_lengths.extend(cur_lengths)
            batch_shape.extend(cur_shapes)
            batch_scale.extend(cur_scales)
            batch_zero_prob.extend(cur_zero_prob)

            node_stop_i = len(batch_shape)
            node_data.append(
                ProbabilityData(node, node_start_i, node_stop_i, cur_cryptic))

        assert len(node_data) > 0
        if len(batch_lengths) > 0:
            pdf_vals = length_classifier.batch_pdf_distributions(
                batch_lengths, batch_shape, batch_scale, batch_zero_prob)
            calc_prob, zero_replace = pdf_vals
        else:
            calc_prob = []

        log_calc_prob_cum = np.cumsum(np.log(calc_prob))
        del calc_prob
        log_calc_prob_cum = np.concatenate(([0.0], log_calc_prob_cum))
        node_probabilities = dict()
        for node, start_i, stop_i, cryptic_prob in node_data:
            log_prob = (log_calc_prob_cum[stop_i] -
                        log_calc_prob_cum[start_i]) + cryptic_prob
            node_probabilities[node] = log_prob
        assert len(node_probabilities) > 0
        if self.probability_logging:
            write_log(
                "identify", {
                    "node": actual_node._id,
                    "probs": {
                        node._id: prob
                        for node, prob in node_probabilities.items()
                    }
                })

        if len(node_probabilities) == 0:
            return RawIdentified(set(), -INF, None)
        # The value 8 is somewhat arbitrary. We are always able to
        # generate our confidence value with the top 8, as sibships
        # tend to be small. This number may need to be larger for
        # populations with large sibships.
        potential_nodes = nlargest(8,
                                   node_probabilities.items(),
                                   key=lambda x: x[1])
        top, top_log_prob = potential_nodes[0]
        sibling_group = get_suspected_sibling_group(top)
        for node, log_prob in potential_nodes[1:]:
            if node in sibling_group:
                continue
            next_node = node
            next_log_prob = log_prob
            break
        else:
            if len(potential_nodes) > 1:
                next_node, next_log_prob = potential_nodes[1]

        if len(potential_nodes) > 1:
            log_ratio = top_log_prob - next_log_prob
        else:
            log_ratio = -INF
        return RawIdentified(get_sibling_group(top), log_ratio, top)