Example #1
0
def test_decimate():
    """Test decimation of digitizer headshapes with too many points."""
    # load headshape and convert to meters
    hsp_mm = _get_ico_surface(5)['rr'] * 100
    hsp_m = hsp_mm / 1000.

    # save headshape to a file in mm in temporary directory
    tempdir = _TempDir()
    sphere_hsp_path = op.join(tempdir, 'test_sphere.txt')
    np.savetxt(sphere_hsp_path, hsp_mm)

    # read in raw data using spherical hsp, and extract new hsp
    with warnings.catch_warnings(record=True) as w:
        raw = read_raw_kit(sqd_path, mrk_path, elp_txt_path, sphere_hsp_path)
    assert_true(any('more than' in str(ww.message) for ww in w))
    # collect headshape from raw (should now be in m)
    hsp_dec = np.array([dig['r'] for dig in raw.info['dig']])[8:]

    # with 10242 points and _decimate_points set to resolution of 5 mm, hsp_dec
    # should be a bit over 5000 points. If not, something is wrong or
    # decimation resolution has been purposefully changed
    assert_true(len(hsp_dec) > 5000)

    # should have similar size, distance from center
    dist = np.sqrt(np.sum((hsp_m - np.mean(hsp_m, axis=0))**2, axis=1))
    dist_dec = np.sqrt(np.sum((hsp_dec - np.mean(hsp_dec, axis=0))**2, axis=1))
    hsp_rad = np.mean(dist)
    hsp_dec_rad = np.mean(dist_dec)
    assert_almost_equal(hsp_rad, hsp_dec_rad, places=3)
Example #2
0
def _lmvnpdffull(obs, means, covars):
    """
    Log probability for full covariance matrices.

    WARNING: In certain cases, this function will modify in-place
    some of the covariance matrices

    """
    from scipy import linalg
    import itertools
    if hasattr(linalg, 'solve_triangular'):
        # only in scipy since 0.9
        solve_triangular = linalg.solve_triangular
    else:
        # slower, but works
        solve_triangular = linalg.solve
    n_obs, n_dim = obs.shape
    nmix = len(means)
    log_prob = np.empty((n_obs, nmix))
    for c, (mu, cv) in enumerate(itertools.izip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            # The model is most probabily stuck in a component with too
            # few observations, we need to reinitialize this components
            cv[:] = 10 * np.eye(cv.shape[0])
            cv_chol = cv
        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = solve_triangular(cv_chol, (obs - mu).T, lower=True).T
        log_prob[:, c] = -.5 * (np.sum(cv_sol ** 2, axis=1) + \
                           n_dim * np.log(2 * np.pi) + cv_log_det)

    return log_prob
 def measure_objects(self, operand, workspace):
     '''Performs the measurements on the requested objects'''
     objects = workspace.get_objects(operand.operand_objects.value)
     if objects.has_parent_image:
         area_occupied = np.sum(objects.segmented[objects.parent_image.mask]>0)
         perimeter = np.sum(outline(np.logical_and(objects.segmented != 0,objects.parent_image.mask)))
         total_area = np.sum(objects.parent_image.mask)
     else:
         area_occupied = np.sum(objects.segmented > 0)
         perimeter = np.sum(outline(objects.segmented) > 0)
         total_area = np.product(objects.segmented.shape)
     m = workspace.measurements
     m.add_image_measurement(F_AREA_OCCUPIED%(operand.operand_objects.value),
                             np.array([area_occupied], dtype=float ))
     m.add_image_measurement(F_PERIMETER%(operand.operand_objects.value),
                             np.array([perimeter], dtype=float ))
     m.add_image_measurement(F_TOTAL_AREA%(operand.operand_objects.value),
                             np.array([total_area], dtype=float))
     if operand.should_save_image.value:
         binary_pixels = objects.segmented > 0
         output_image = cpi.Image(binary_pixels,
                                  parent_image = objects.parent_image)
         workspace.image_set.add(operand.image_name.value,
                                 output_image)
     return[[operand.operand_objects.value,
             str(area_occupied),str(perimeter),str(total_area)]]
Example #4
0
def torgerson(distances, n_components=2):
    """
    Perform classical mds (Torgerson scaling).

    ..note ::
        If the distances are euclidean then this is equivalent to projecting
        the original data points to the first `n` principal components.

    """
    distances = np.asarray(distances)
    assert distances.shape[0] == distances.shape[1]
    N = distances.shape[0]
    # O ^ 2
    D_sq = distances ** 2

    # double center the D_sq
    rsum = np.sum(D_sq, axis=1, keepdims=True)
    csum = np.sum(D_sq, axis=0, keepdims=True)
    total = np.sum(csum)
    D_sq -= rsum / N
    D_sq -= csum / N
    D_sq += total / (N ** 2)
    B = np.multiply(D_sq, -0.5, out=D_sq)

    U, L, _ = np.linalg.svd(B)
    if n_components > N:
        U = np.hstack((U, np.zeros((N, n_components - N))))
        L = np.hstack((L, np.zeros((n_components - N))))
    U = U[:, :n_components]
    L = L[:n_components]
    D = np.diag(np.sqrt(L))
    return np.dot(U, D)
Example #5
0
def sum_to_shape(X, s):
    """
    Sum axes of the array such that the resulting shape is as given.

    Thus, the shape of the result will be s or an error is raised.
    """
    # First, sum and remove axes that are not in s
    if np.ndim(X) > len(s):
        axes = tuple(range(-np.ndim(X), -len(s)))
    else:
        axes = ()
    Y = np.sum(X, axis=axes)

    # Second, sum axes that are 1 in s but keep the axes
    axes = ()
    for i in range(-np.ndim(Y), 0):
        if s[i] == 1:
            if np.shape(Y)[i] > 1:
                axes = axes + (i,)
        else:
            if np.shape(Y)[i] != s[i]:
                raise ValueError("Shape %s can't be summed to shape %s" %
                                 (np.shape(X), s))
    Y = np.sum(Y, axis=axes, keepdims=True)
    
    return Y
def dice_(seg, gt):
    intersection = 2. * np.sum(seg * gt)
    denominator = (np.sum(np.square(seg)) + np.sum(np.square(gt)))
    if denominator == 0:
        return 1.
    similarity = intersection / denominator
    return similarity
Example #7
0
 def dist_tree(self, other, tol=1e-8):
     if self.ndim == 2:
         from . import d2 as sim
     else:
         from . import d3 as sim
         
     assert self.ndim == other.ndim
     
     if self.gamma != 0 or other.gamma != 0:
         assert np.abs(self.gamma - other.gamma) <= tol
         gamma = (self.gamma + other.gamma) / 2.0
         box = sim.LeesEdwardsBox(sim.Vec(1,1), gamma)
     else:
         box = sim.OriginBox(1.0)
     
     sz1 = self.size_indices()
     assert(len(sz1) == 2)
     cutoff1 = int(np.sum(sz1[0]))
     
     sz2 = other.size_indices()
     assert(len(sz2) == 2)
     cutoff2 = int(np.sum(sz2[0]))
     
     vs1 = [sim.Vec(*xy) for idx in sz1 for xy in self.rs[idx]]
     vs2 = [sim.Vec(*xy) for idx in sz2 for xy in other.rs[idx]]
     
     tree = sim.JammingTreeBD(box, sim.vecvector(vs1), sim.vecvector(vs2), cutoff1, cutoff2)
     return tree
Example #8
0
    def compute_size(fonts, layout):
        """Compute width + height for the layout + line boxes + text boxes."""
        for line in layout.children:
            for box in line.children:
                if isinstance(box, TextBox):
                    font = fonts.font(box.style)
                    box.width = font.width(box.text)

                    # Box baseline is the relative offset from the line baseline in canvas coordinates
                    alignment_baseline = box.style["alignment-baseline"]
                    if alignment_baseline == "alphabetic":
                        box.baseline = 0
                    elif alignment_baseline == "central":
                        box.baseline = font.ascent * 0.5
                    elif alignment_baseline == "hanging":
                        box.baseline = font.ascent
                    elif alignment_baseline == "middle":
                        box.baseline = font.ascent * 0.35
                    else:
                        raise ValueError("Unknown alignment-baseline value: %s" % alignment_baseline)

                    # Box top is the relative offset from the line baseline in canvas coordinates
                    box.top = box.baseline - font.ascent
                    # Box bottom is the relative offset from the line baseline in canvas coordinates
                    box.bottom = box.baseline - font.descent

                    box.height = box.bottom - box.top
                elif isinstance(box, MarkerBox):
                    font = fonts.font(box.style)

                    box.baseline = 0
                    box.top = box.baseline - font.ascent
                    box.bottom = box.baseline - font.descent
                    box.height = box.bottom - box.top
                    box.width = box.height
                elif isinstance(box, (PushHyperlink, PopHyperlink)):
                    box.baseline = 0
                    box.top = box.baseline
                    box.bottom = box.baseline
                    box.height = box.bottom - box.top
                    box.width = 0
                else:
                    raise Exception("Unexpected box type: %s" % box) # pragma: no cover

            # Line top is the relative offset from the line baseline in canvas coordinates
            line.top = numpy.min([child.top for child in line.children]) if line.children else 0
            # Line bottom is the relative offset from the line baseline in canvas coordinates
            line.bottom = numpy.max([child.bottom for child in line.children]) if line.children else 0

            actual_line_height = line.bottom - line.top
            explicit_line_height = line.style["line-height"]
            offset = (explicit_line_height - actual_line_height) * 0.5
            if offset > 0:
                line.top -= offset
                line.bottom += offset

            line.width = numpy.sum([child.width for child in line.children]) if line.children else 0
            line.height = line.bottom - line.top

        layout.height = numpy.sum([line.height for line in layout.children]) if layout.children else 0
def cond_entropy(period, data, p_bins=10, m_bins=5):
	'''
	Compute the conditional entropy for the 
	normalized observations
	'''
	if period <= 0:
		return np.PINF
	r = rephase(data, period)
	bins, *_ = np.histogram2d(r[:,0], r[:,1], [p_bins, m_bins],
								[[0,1], [0,1]])
	size = r.shape[0]
	if size > 0:
		divided_bins = bins / size
		arg_positive = divided_bins > 0
		column_sums = np.sum(divided_bins, axis=1)
		column_sums = np.repeat(np.reshape(column_sums,
								(p_bins,1)), m_bins, axis=1)
		
		select_divided_bins = divided_bins[arg_positive]
		select_column_sums  = column_sums[arg_positive]

		A = np.empty((p_bins, m_bins), dtype=float)
		A[ arg_positive] = select_divided_bins \
                         * np.log(select_column_sums \
                         / select_divided_bins)
		A[~arg_positive] = 0

		return np.sum(A)
	else:
		return np.PINF
Example #10
0
    def fitBrokenToStrips(self):

        """Fits broken-line to strip data"""

        # convenience views to avoid typos
        bStrip = (self.stripCounts > 10)
        if np.sum(bStrip) < 1:
            return

        x = self.stripFeHs[bStrip]
        y = self.stripMedns[bStrip]  # could come back to this later

        # guess set of parameters
        #guess = [-0.4, -40., -240., 40.]
        guess = [-0.3, 0.0, -250., 0.0]

        # Yes this could all be looped through...

        meritStraight = lambda pars, x, y: oneStraight(x, pars) - y
        meritBroken = lambda pars, x, y: twoStraight(x, pars) - y 

        # do the fitting 
        self.parsStraight, self.successStraight = \
            leastsq(meritStraight,guess[0:2] ,args=(x,y))

        self.parsBroken, self.successBroken = \
            leastsq(meritBroken,guess[:] ,args=(x,y))

        # compute the sum of residuals, over the strips (not the
        # stars)
        self.chisqStraight = np.sum(meritStraight(self.parsStraight, x, y)**2)
        self.chisqBroken = np.sum(meritBroken(self.parsBroken, x, y)**2)
Example #11
0
def decider(pp_matrix, ML_dec, t, R_lim, num_annotators,C):

    pp_matrix2 = np.hstack((pp_matrix, ML_dec.reshape((C,1)))) #concatenate transpose of ML_decision to pp_matrix
    v = np.sum(pp_matrix2, axis=1)/np.sum(np.sum(pp_matrix2)) #create vector of normalized sums of pp_matrix2
    maximum = np.amax(v) #initialize maximum, max value of v
    maxIdx = np.argmax(v) #initialize maxIdx, index of max value of v

    if maximum >= t[maxIdx]: #if maximum is above threshold for given class, retire image

        decision = 1
        print('Image is retired')

    elif num_annotators >= R_lim: #if more than R_lim annotators have looked at image and no decision reached, pass to more experience users

        decision = 2
        print('Image is given to the upper class')

    else: #if fewer than R_lim annotators have looked at image, keep image

        decision = 3
        print('More labels are needed for the image')

    image_class = maxIdx

    return decision, image_class
 def _LMLgrad_beta(self, hyperparams):
     LATildeInfTP = numpy.dot(self._LAtildeInv.T, self._P)
     gBeta = .5 * (self.d * (numpy.trace(self._C) + (self.n - self.m) * self._beta_inv - self._psi_0)
                   - self.TrYY + self._TrPP
                   + self._beta_inv ** 2 * self.d * numpy.sum(self._LAtildeInv * self._LAtildeInv)
                   + self._beta_inv * numpy.sum(LATildeInfTP ** 2))
     return -gBeta  # negative because gradient is w.r.t loglikelihood
Example #13
0
def std(f):
    x = np.array(range(len(f)))
    # normalize; we do not prefer attributes with many values
    x = x / x.mean()
    xf = np.multiply(f, x)
    x2f = np.multiply(f, np.power(x, 2))
    return np.sqrt((np.sum(x2f) - np.power(np.sum(xf), 2) / np.sum(f)) / (np.sum(f) - 1))
Example #14
0
    def _beam_map_single(self, bl_index, f_index):

        p_stokes = [ 0.5 * np.array([[1.0,   0.0], [0.0,  1.0]]),
                     0.5 * np.array([[1.0,   0.0], [0.0, -1.0]]),
                     0.5 * np.array([[0.0,   1.0], [1.0,  0.0]]),
                     0.5 * np.array([[0.0, -1.0J], [1.0J, 0.0]]) ]

        # Get beam maps for each feed.
        feedi, feedj = self.uniquepairs[bl_index]
        beami, beamj = self.beam(feedi, f_index), self.beam(feedj, f_index)

        # Get baseline separation and fringe map.
        uv = self.baselines[bl_index] / self.wavelengths[f_index]
        fringe = visibility.fringe(self._angpos, self.zenith, uv)

        pow_stokes = [ np.sum(beami * np.dot(beamj.conjugate(), polproj), axis=1) * self._horizon for polproj in p_stokes]

        # Calculate the solid angle of each beam
        pxarea = (4*np.pi / beami.shape[0])

        om_i = np.sum(np.abs(beami)**2 * self._horizon[:, np.newaxis]) * pxarea
        om_j = np.sum(np.abs(beamj)**2 * self._horizon[:, np.newaxis]) * pxarea

        omega_A = (om_i * om_j)**0.5

        # Calculate the complex visibility transfer function
        cv_stokes = [ p * (2 * fringe / omega_A) for p in pow_stokes ]

        return cv_stokes
def get_num_diffs(cummulativeFrames):
  shape = cummulativeFrames.shape
  xMax = shape[0]
  yMax = shape[1]

  numBlocks = (xMax * yMax / PIXINCR)
  print numBlocks
  totalDiffs = np.sum(cummulativeFrames)
  totalDiffsAvg = totalDiffs / numBlocks
  print totalDiffsAvg

  xPos = 0 
  yPos = 0

  allDiffs = []

  #print "Starting diffs.."
  diff_count = 0
  while yPos < yMax:
    xPos = 0
    while xPos < xMax:
      chunk = cummulativeFrames[xPos:xPos+PIXINCR, yPos:yPos+PIXINCR]
      diffs = np.sum(chunk)
      diffs=1
      allDiffs.append(diffs)
      xPos+=PIXINCR

    yPos+=PIXINCR
   
    for diff in allDiffs: 
      if diff > totalDiffsAvg:
        diff_count+=1
  return diff_count
Example #16
0
def findrotation_kabsch(coords1, coords2, align_com=True):
    '''
    Kabsch, Wolfgang, (1976) "A solution of the best rotation to relate two sets of vectors", Acta Crystallographica 32:922
    '''
    
    # check if arrays are of same size
    if(coords1.size != coords2.size):
        raise BaseException("dimension of arrays does not match")
    
    # reshape the arrays
    x1 = coords1.reshape([-1,3])
    x2 = coords2.reshape([-1,3])
    
    # determine number of atoms
    natoms = x1.shape[0]
    
    # set both com to zero
    if(align_com):
        com1 = np.sum(x1,axis=0) / float(natoms)
        com2 = np.sum(x2,axis=0) / float(natoms)
        x1 -= com1
        x2 -= com2
  
    # calculate covariance matrix
    A = np.dot( x2.transpose(), x1)
    # and do single value decomposition
    u, s, v = np.linalg.svd(A)
 
    if np.linalg.det(u) * np.linalg.det(v) + 1.0 < 1e-8:
        s[-1] = -s[-1]
        u[:,-1] = -u[:,-1]
 
    return  np.dot(u, v).transpose()
Example #17
0
    def compute_distances_no_loops(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using no explicit loops.

        Input / Output: Same as compute_distances_two_loops
        """
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train)) 
        
        # l2(A, X) = l2([a,b], [x,y]) = ...
        #     sqrt( (a-x)^2 + (b-y)^2 ) = ...
        #     = sqrt( a^2 -2ax + x^2 + b^2 -2by + y^2 ) = 
        #     = sqrt( (a^2 + b^2) + (x^2 + y^2) -2(ax + by) ) = ...
        #     = sqrt( A^2 - 2 * A.dot(X.T) + X^2 ) =
        #     = np.sqrt( np.sum(A ** 2) + np.sum(X ** 2) -2 * A.dot(X.T) )
        
        # -2*A.dot(X)
        res = -2 * X.dot(self.X_train.T)   
        # -2*A.dot(X) + A^2 + X^2
        res = res + np.sum(X ** 2, axis=1).reshape((-1,1))  + np.sum(self.X_train ** 2, axis=1)
        dists = np.sqrt(res)
        
        return dists
	def pick_parents(self):

		# compute fitness
		w = self.calc_fitness()
		# sample parents proportional to their fitness
		males = np.array(np.nonzero(self.sex == 0)).flatten()
		females = np.array(np.nonzero(self.sex == 1)).flatten()
		w_male = w[males]/np.sum(w[males])
		w_female = w[females]/np.sum(w[females])
		males = np.random.choice(males, size = self.size/2, p = w_male)
		females = np.random.choice(females, size = self.size/2, p = w_female)
		assert(len(males) == len(females))

		idx = []
		sexes = []
		for i in range(0, len(males)):
			idx.append(males[i])
			idx.append(females[i])
			sexes.append(0)
			sexes.append(1)

		# make a copy of current population
		newpop = copy.deepcopy(self)
		newpop.sex = np.array(sexes)

		# now copy chromosomes from current population to new one
		for j in range(0, len(self.chrlen)):
			for i in range(0, self.size):
				newpop.tr[i] = self.tr[ idx[i] ]
				newpop.chroms[j][2*i] = self.chroms[j][ 2*idx[i] ]
				newpop.chroms[j][2*i+1] = self.chroms[j][ 2*idx[i]+1 ]

		return newpop
Example #19
0
def svm_loss(x, y):
    """
    Computes the loss and gradient using for multiclass SVM classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

    Returns a tuple of:
    - loss: Scalar giving the loss
    - dx: Gradient of the loss with respect to x
    """
    N = x.shape[0]
    correct_class_scores = x[np.arange(N), y]
    margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
    margins[np.arange(N), y] = 0
    loss = np.sum(margins) / N
    num_pos = np.sum(margins > 0, axis=1)
    dx = np.zeros_like(x)
    dx[margins > 0] = 1
    dx[np.arange(N), y] -= num_pos
    dx /= N
    return loss, dx
    def __update_b_vec(self,cur_obs):
        # convert measurement vector into emission probabilities
        # repeat the observation in columns
        cur_obs_mat = np.tile(cur_obs,(self.V_mat.shape[1],1)).T
        masked_mat = cur_obs_mat == self.V_mat

        # Extract the probability of the observation on each link for each state
        p_obs_given_off_link = np.sum(self.off_links*masked_mat,axis=1)
        p_obs_given_on_link  = np.sum(self.on_links*masked_mat,axis=1)

        # replicate the probability of each measurement on each link for each state
        p_obs_mat_off = np.tile(p_obs_given_off_link,(self.num_states,1)).T
        p_obs_mat_on  = np.tile(p_obs_given_on_link,(self.num_states,1)).T

        # Compute emission probabilities
        tmp1 = self.codewords*p_obs_mat_on
        tmp2 = np.logical_not(self.codewords)*p_obs_mat_off
        tmp3 = tmp1 + tmp2
        
        # divide tmp3 into groups of 4.  Multiply and normalize
        prev = np.ones(self.num_states)
        start_mark = 0
        end_mark = 4
        group = end_mark
        while start_mark < self.num_links:
            current = np.product(tmp3[start_mark:np.minimum(self.num_links,end_mark),:],axis=0)
            current = current/np.sum(current)
            prev = (prev*current)/np.sum(prev*current)
            end_mark += group
            start_mark += group

        # add emission probabilities to the circular buffer
        self.C.add_observation(prev)        
Example #21
0
def log_sum_exp(x, axis=0):
    """ Calculates the logarithm of the sum of e to the power of input 'x'. The method tries to avoid
        overflows by using the relationship: log(sum(exp(x))) = alpha + log(sum(exp(x-alpha))).
        
    :Parameter:
        x:    data.
             -type: float or numpy array 
          
        axis: Sums along the given axis.
             -type: int
        
    :Return:
        Logarithm of the sum of exp of x. 
       -type: float or numpy array.
        
    """
    alpha = x.max(axis) - numx.log(numx.finfo(numx.float64).max)/2.0
    if axis == 1:
        return numx.squeeze(alpha + numx.log(
                                             numx.sum(
                                                      numx.exp(x.T - alpha)
                                                      , axis=0)))
    else:
        return numx.squeeze(alpha + numx.log(
                                             numx.sum(
                                                      numx.exp(x - alpha)
                                                      , axis=0)))
 def __set_static_gaus_pmfs(self):
     if np.logical_not(self.off_buff.is_full()):
         print "The long term buffer is not yet full.  This may give undesirable results"
     
     # median RSS of off-state buffer
     cal_med = self.off_buff.get_no_nan_median()
     
     if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0):
         sys.stderr.write('At least one link has a median of 127 or is nan\n\n')
         quit()
          
     if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0):
         sys.stderr.write('the long term buffer has a nan')
         quit()
     
     cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T
     
     # variance of RSS during calibration
     cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 
     cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T
     
     # Compute the off_link emission probabilities for each link
     x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0
     self.off_links = self.__normalize_pmf(x)
     
     # Compute the on_link emission probabilities for each link
     x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3
     self.on_links = self.__normalize_pmf(x) 
Example #23
0
    def transform_scalars(self, dataset, N=25):
        """Add Poisson noise to tilt images"""
        self.progress.maximum = 1

        tiltSeries = utils.get_array(dataset).astype(float)
        if tiltSeries is None:
            raise RuntimeError("No scalars found!")

        Ndata = tiltSeries.shape[0] * tiltSeries.shape[1]

        self.progress.maximum = tiltSeries.shape[2]
        step = 0
        for i in range(tiltSeries.shape[2]):
            if self.canceled:
                return

            tiltImage = tiltSeries[:, :, i].copy()
            tiltImage = tiltImage / np.sum(tiltSeries[:, :, i]) * (Ndata * N)
            tiltImage = np.random.poisson(tiltImage)
            tiltImage = tiltImage * np.sum(tiltSeries[:, :, i]) / (Ndata * N)

            tiltSeries[:, :, i] = tiltImage.copy()
            step += 1
            self.progress.value = step

        utils.set_array(dataset, tiltSeries)
Example #24
0
    def smooth_objective(self, x, mode='both', check_feasibility=False):
        """
        Evaluate a smooth function and/or its gradient

        if mode == 'both', return both function value and gradient
        if mode == 'grad', return only the gradient
        if mode == 'func', return only the function value
        """
        x = self.apply_offset(x)
        exp_x = np.exp(x)

        #TODO: Using transposes to scale the rows of a 2d array - should we use an affine_transform to do this?
        #JT: should be able to do this with np.newaxis

        if mode == 'both':
            ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T
            f, g = -2. * self.scale(np.sum(self.firstcounts * x) -  np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), - 2 * self.scale(self.firstcounts - ratio) 
            return f, g
        elif mode == 'grad':
            ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T
            f, g = None, - 2 * self.scale(self.firstcounts - ratio) 
            return g
        elif mode == 'func':
            f, g = -2. * self.scale(np.sum(self.firstcounts * x) -  np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), None
            return f
        else:
            raise ValueError("mode incorrectly specified")
Example #25
0
def chol_logdet(U):
    if isinstance(U, np.ndarray):
        return 2*np.sum(np.log(np.diag(U)))
    elif isinstance(U, cholmod.Factor):
        return np.sum(np.log(U.D()))
    else:
        raise ValueError("Unknown type of Cholesky factor")
 def __init__(self, data, classes, tree_features, n_trees=100):
     self.n_features = np.shape(data)[1]
     n_rows = np.shape(data)[0]
     n_nans = np.sum(np.isnan(data), 0)
     data = data[:, n_nans < n_rows]
     self.n_features = np.shape(data)[1]
     
     n_nans = np.sum(np.isnan(data), 1)
     data = data[n_nans < self.n_features, :]
     self.n_rows = np.shape(data)[0]
     
     if (tree_features > self.n_features):
         tree_features = self.n_features
     
     self.col_list = np.zeros((n_trees, tree_features), dtype='int')
     self.n_trees = n_trees
     self.bags = []
     for i in range(n_trees):
         cols = sample(range(self.n_features), tree_features)
         cols.sort()
         self.col_list[i, :] = cols
         data_temp = data[:, cols]
         n_nans = np.sum(np.isnan(data_temp), 1)
         data_temp = data_temp[n_nans == 0, :]
         classes_temp = classes[n_nans == 0]
         #bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
         bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
         bag.fit(data_temp, classes_temp)
         self.bags.append(bag)
         print(np.shape(data_temp))
Example #27
0
def test_matrix_assemble(dim):
    eps = 1000*DOLFIN_EPS

    (u, uu), (v, vv), (U, UU), dPP, bc = _create_dp_problem(dim)

    # Scalar assemble
    mat = assemble(u*v*U*dPP)

    # Create a numpy matrix based on the local size of the vector
    # and populate it with values from local vector
    loc_range = u.vector().local_range()
    vec_mat = np.zeros_like(mat.array())
    vec_mat[range(loc_range[1] - loc_range[0]),
            range(loc_range[0], loc_range[1])] = u.vector().get_local()

    assert np.sum(np.absolute(mat.array() - vec_mat)) < eps

    # Vector assemble
    mat = assemble((uu[0]*vv[0]*UU[0] + uu[1]*vv[1]*UU[1])*dPP)

    # Create a numpy matrix based on the local size of the vector
    # and populate it with values from local vector
    loc_range = uu.vector().local_range()
    vec_mat = np.zeros_like(mat.array())
    vec_mat[range(loc_range[1] - loc_range[0]),
            range(loc_range[0], loc_range[1])] = uu.vector().get_local()

    assert np.sum(np.absolute(mat.array() - vec_mat)) < eps
Example #28
0
def discrepancy(observed, simulated, expected):
    """Calculates Freeman-Tukey statistics (Freeman and Tukey 1950) as
    a measure of discrepancy between observed and r replicates of simulated data. This
    is a convenient method for assessing goodness-of-fit (see Brooks et al. 2000).
    
    D(x|\theta) = \sum_j (\sqrt{x_j} - \sqrt{e_j})^2
    
    :Parameters:
      observed : Iterable of observed values (length n)
      simulated : Iterable of simulated values (length rxn)
      expected : Iterable of expected values (length rxn)
    
    :Returns:
      D_obs : Discrepancy of observed values
      D_sim : Discrepancy of simulated values
    
    """
    try:
        simulated = simulated.astype(float)
    except AttributeError:
        simulated = simulated.trace().astype(float)
    try:
        expected = expected.astype(float)
    except AttributeError:
        expected = expected.trace().astype(float)
    
    D_obs = np.sum([(np.sqrt(observed)-np.sqrt(e))**2 for e in expected], 1)
    D_sim = np.sum([(np.sqrt(s)-np.sqrt(e))**2 for s,e in zip(simulated, expected)], 1)
    
    # Print p-value
    count = sum(s>o for o,s in zip(D_obs,D_sim))
    print_('Bayesian p-value: p=%.3f' % (1.*count/len(D_obs)))
    
    return D_obs, D_sim
Example #29
0
def f1(u, eigvals, Z10, Z11):
    """
    A component of exact time pdf (Eq. 22, HJC92).

    Parameters
    ----------
    u : float
        u = t - tres
    eigvals : array_like, shape (k,)
        Eigenvalues of -Q matrix.
    Z10, Z11 (or gama10, gama11) : list of array_likes
        Constants for the exact open/shut time pdf. Z10, Z11 for likelihood
        calculation or gama10, gama11 for time distributions.

    Returns
    -------
    f : ndarray
    """

#    f = np.zeros(Z10[0].shape)
#    for i in range(len(eigvals)):
#        f += (Z10[i] + Z11[i] * u) *  math.exp(-eigvals[i] * u)

    if Z10.ndim > 1:
        f = np.sum((Z10 + Z11 * u) *
            np.exp(-eigvals * u).reshape(Z10.shape[0],1,1), axis=0)
    else:
        f = np.sum((Z10 + Z11 * u) * np.exp(-eigvals * u))
    return f
Example #30
0
def stats(t, snp=None):
    '''Return a record array with imputation statistics.'''
    T = t.sample_index_to_impute
    imputed = t.imputed_data[:, T, :]
    tot_to_impute = 2 * imputed.shape[1]
    snp = snp if snp is not None else np.arange(t.num_snps)
    stats = np.zeros((len(snp),),
                     dtype=[
                            ('dist_cm', 'f4'), # Genetic distance from beginning of chromosome
                            ('count', '(2,)i4'), # Allele count
                            ('frequency', '(2,)f4'), # Allele frequency
                            ('call_rate', 'f4'), # Imputation Call rate
                            ('call_rate_training', 'f4')  # Imputation Call rate
                            ])
    call_rate_training = 1.0 * np.sum(np.sum(t.imputed_data[:, t.sample_index, :] != 0, axis=2), axis=1)# / (2 * len(t.sample_index))        
    for row, snp_index in enumerate(snp):
        # TODO: replace by a bulk group-by/hist?
        # g = t.__t.training_data[snp_index, :, :]
        i = imputed[snp_index, :]
        (c1, c2) = (len(np.where(i == 1)[0]), len(np.where(i == 2)[0]))
        c = c1 + c2 + SMALL_FLOAT
        f1, f2 = (1.0 * c1) / c, (1.0 * c2) / c
        call_rate = 1.0 * len(i.nonzero()[0]) / tot_to_impute
        # print 'c1 %4d c2 %4d f1 %.2f f2 %.2f call rate %5.2f' % (c1, c2, f1, f2, call_rate)
        stats[row] = (t.snp['dist_cm'][snp_index], [c1, c2], [f1, f2], call_rate, call_rate_training[snp_index])
    return stats