Example #1
0
    def find_coord_bin(self, coord):
        """Find the bins that contain the specified coord (X, Y) pairs.

        Parameters
        ----------
        coord : `~astropy.coordinates.Angle`
            Array of coord (X, Y) pairs to search for.

        Returns
        -------
        bin_index : `~numpy.ndarray`
            Array of integers with the indices (x, y) of the coord
            bin containing the specified coord (X, Y) pair.
        """
        # check that the specified coord is within the boundaries of the cube
        coord_extent = self.image_extent
        check_x_lo = (coord_extent[0] <= coord[0]).all()
        check_x_hi = (coord[0] < coord_extent[1]).all()
        check_y_lo = (coord_extent[2] <= coord[1]).all()
        check_y_hi = (coord[1] < coord_extent[3]).all()
        if not (check_x_lo and check_x_hi) or not (check_y_lo and check_y_hi):
            raise ValueError("Specified coord {0} is outside the boundaries {1}."
                             .format(coord, coord_extent))

        bin_index_x = np.searchsorted(self.coordx_edges[1:], coord[0])
        bin_index_y = np.searchsorted(self.coordy_edges[1:], coord[1])

        return np.array([bin_index_x, bin_index_y])
Example #2
0
    def _get_single_element(self, row, col):
        M, N = self.shape
        if (row < 0):
            row += M
        if (col < 0):
            col += N
        if not (0 <= row < M) or not (0 <= col < N):
            raise IndexError("index out of bounds: 0<=%d<%d, 0<=%d<%d" %
                             (row, M, col, N))

        major_index, minor_index = self._swap((row, col))

        start = self.indptr[major_index]
        end = self.indptr[major_index + 1]

        if self.has_sorted_indices:
            # Copies may be made, if dtypes of indices are not identical
            minor_index = self.indices.dtype.type(minor_index)
            minor_indices = self.indices[start:end]
            insert_pos_left = np.searchsorted(
                minor_indices, minor_index, side='left')
            insert_pos_right = insert_pos_left + np.searchsorted(
                minor_indices[insert_pos_left:], minor_index, side='right')
            return self.data[start + insert_pos_left:
                             start + insert_pos_right].sum(dtype=self.dtype)
        else:
            return np.compress(minor_index == self.indices[start:end],
                               self.data[start:end]).sum(dtype=self.dtype)
    def _get_radius_neighbors(self, query, max_depth, bin_queries, radius):
        """Finds radius neighbors from the candidates obtained.

        Their distances from query are smaller than radius.
        Returns radius neighbors and distances.
        """
        ratio_within_radius = 1
        threshold = 1 - self.radius_cutoff_ratio
        total_candidates = np.array([], dtype=int)
        total_neighbors = np.array([], dtype=int)
        total_distances = np.array([], dtype=float)

        while max_depth > self.min_hash_match and ratio_within_radius > threshold:
            left_mask = self._left_mask[max_depth]
            right_mask = self._right_mask[max_depth]
            candidates = []
            for i in range(self.n_estimators):
                start, stop = _find_matching_indices(self.trees_[i], bin_queries[i], left_mask, right_mask)
                candidates.extend(self.original_indices_[i][start:stop].tolist())
            candidates = np.setdiff1d(candidates, total_candidates)
            total_candidates = np.append(total_candidates, candidates)
            ranks, distances = self._compute_distances(query, candidates)
            m = np.searchsorted(distances, radius, side="right")
            positions = np.searchsorted(total_distances, distances[:m])
            total_neighbors = np.insert(total_neighbors, positions, candidates[ranks[:m]])
            total_distances = np.insert(total_distances, positions, distances[:m])
            ratio_within_radius = total_neighbors.shape[0] / float(total_candidates.shape[0])
            max_depth = max_depth - 1
        return total_neighbors, total_distances
Example #4
0
    def map_to_external_reference(self, roi, refname='HXB2', in_patient=True):
        '''
        return a map of positions in the patient to a reference genomewide
        Args:
            roi  --  region of interest given as a string or a tuple (start, end)
            refname --  reference to compare to
            in_patient -- specifies whether the (start, end) refers to reference or patient coordinates
        returns:
            a (len(roi), 3) array with reference coordinates in first column, 
                                        patient coordinates in second 
                                        roi coordinates in third column
        '''
        from .filenames import get_coordinate_map_filename
        coo_fn = get_coordinate_map_filename(self.name, 'genomewide', refname=refname)
        genomewide_map = np.loadtxt(coo_fn, dtype=int)

        if roi in self.annotation:
            roi_pos = np.array([x for x in self.annotation[roi]], dtype = int)
            ind = np.in1d(genomewide_map[:,1], roi_pos)
            roi_indices = np.in1d(roi_pos, genomewide_map[:,1]).nonzero()[0]
            return np.vstack((genomewide_map[ind].T, [roi_indices])).T

        elif roi == "genomewide":
            return np.vstack((genomewide_map.T, [genomewide_map[:,1]])).T            

        else:
            try:
                start, stop = map(int, roi)
                start_ind = np.searchsorted(genomewide_map[:,in_patient], start)
                stop_ind = np.searchsorted(genomewide_map[:,in_patient], stop)
                return np.vstack((genomewide_map[start_ind:stop_ind].T,
                                  [genomewide_map[start_ind:stop_ind, in_patient] - start])).T
            except:
                raise ValueError("ROI not understood")
Example #5
0
    def __init__(self, A, fraction=0.80):
        assert 0 <= fraction <= 1
        # A = U . diag(d) . Vt, O( m n^2 ), lapack_lite --
        self.U, self.d, self.Vt = np.linalg.svd(A, full_matrices=False)
        # different versions of numpy can return U and Vt such that
        # U * Vt is constant but the signs may be switched. Gross...
        # numpy, you owe me one day buster!
        # force a check here...
        if self.Vt[0, 0] < 0:
            self.Vt *= -1.0
            self.U *= -1.0
        assert np.all(self.d[:-1] >= self.d[1:])  # sorted
        self.eigen = self.d ** 2
        self.sumvariance = np.cumsum(self.eigen)
        try:
            self.sumvariance /= self.sumvariance[-1]
        except:
            print len(A), len(self.sumvariance), len(self.eigen)
            raise

        self.npc = np.searchsorted(self.sumvariance, fraction) + 1
        while self.npc == 1:  # prevents less than 2 pcs being found
            fraction *= 1.1
            self.npc = np.searchsorted(self.sumvariance, fraction) + 1
        self.dinv = np.array([1 / d if d > self.d[0] * 1e-6 else 0 for d in self.d])
Example #6
0
def spatio_temporal_src_connectivity(src, n_times):
    """Compute connectivity for a source space activation over time

    Parameters
    ----------
    src : source space
        The source space.

    n_times : int
        Number of time instants

    Returns
    -------
    connectivity : sparse COO matrix
        The connectivity matrix describing the spatio-temporal
        graph structure. If N is the number of vertices in the
        source space, the N first nodes in the graph are the
        vertices are time 1, the nodes from 2 to 2N are the vertices
        during time 2, etc.

    """
    if src[0]['use_tris'] is None:
        raise Exception("The source space does not appear to be an ico "
                        "surface. Connectivity cannot be extracted from "
                        "non-ico source spaces.")
    lh_tris = np.searchsorted(np.unique(src[0]['use_tris']),
                              src[0]['use_tris'])
    rh_tris = np.searchsorted(np.unique(src[1]['use_tris']),
                              src[1]['use_tris'])
    tris = np.concatenate((lh_tris, rh_tris + np.max(lh_tris) + 1))
    return spatio_temporal_tris_connectivity(tris, n_times)
    def test_constant_interpolation_basic(self):
        """Interpolation library works for piecewise constant function
        """

        # Define pixel centers along each direction
        x = numpy.array([1.0, 2.0, 4.0])
        y = numpy.array([5.0, 9.0])

        # Define ny by nx array with corresponding values
        A = numpy.zeros((len(x), len(y)))

        # Define values for each x, y pair as a linear function
        for i in range(len(x)):
            for j in range(len(y)):
                A[i, j] = linear_function(x[i], y[j])

        # Then test that interpolated points are always assigned value of
        # closest neighbour
        xis = numpy.linspace(x[0], x[-1], 10)
        etas = numpy.linspace(y[0], y[-1], 10)
        points = combine_coordinates(xis, etas)

        vals = interpolate2d(x, y, A, points, mode='constant')

        # Find upper neighbours for each interpolation point
        xi = points[:, 0]
        eta = points[:, 1]
        idx = numpy.searchsorted(x, xi, side='left')
        idy = numpy.searchsorted(y, eta, side='left')

        # Get the four neighbours for each interpolation point
        x0 = x[idx - 1]
        x1 = x[idx]
        y0 = y[idy - 1]
        y1 = y[idy]

        z00 = A[idx - 1, idy - 1]
        z01 = A[idx - 1, idy]
        z10 = A[idx, idy - 1]
        z11 = A[idx, idy]

        # Location coefficients
        alpha = (xi - x0) / (x1 - x0)
        beta = (eta - y0) / (y1 - y0)

        refs = numpy.zeros(len(vals))
        for i in range(len(refs)):
            if alpha[i] < 0.5 and beta[i] < 0.5:
                refs[i] = z00[i]

            if alpha[i] >= 0.5 and beta[i] < 0.5:
                refs[i] = z10[i]

            if alpha[i] < 0.5 and beta[i] >= 0.5:
                refs[i] = z01[i]

            if alpha[i] >= 0.5 and beta[i] >= 0.5:
                refs[i] = z11[i]

        assert numpy.allclose(vals, refs, rtol=1e-12, atol=1e-12)
def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert_true(np.abs(
        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)
Example #9
0
File: Util.py Project: omosola/APGL
    def randomChoice(V, n=1):
        """
        Make a random choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2, 4]
        then the probability of the indices repectively are [1/7, 2/7, 4/7]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row. 
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.shape[0]==0:
            return -1 

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n)*cumV[-1]
            return numpy.searchsorted(cumV, p)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T

            inds = numpy.zeros(P.shape, numpy.int)
            for i in range(P.shape[0]):
                inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :])

            return inds
        else:
            raise ValueError("Invalid number of dimensions")
Example #10
0
def window_index_time(t,windowsize,overlap):
    """
    Determines the indices for window start and end points of a time vector
    
    The window does not need to be evenly spaced
    
    Inputs:
        t - list or array of datetime objects
        windowsize - length of the window [seconds]
        overlap - number of overlap points [seconds]
        
    Returns: pt1,pt2 the start and end indices of each window
    """
    
    tsec = othertime.SecondsSince(t)
        
    t1=tsec[0]
    t2=t1 + windowsize
    pt1=[0]
    pt2=[np.searchsorted(tsec,t2)]
    while t2 < tsec[-1]:
        t1 = t2 - overlap
        t2 = t1 + windowsize

        pt1.append(np.searchsorted(tsec,t1))
        pt2.append(np.searchsorted(tsec,t2))
        
    return pt1, pt2
def get_peaks(sub_gene_df, top_s, max_dist, feature_name):
    """
    For each gene in gene_info get the
    peaks within max_dist in top_s. This 
    is basically reverse engineering to get
    the peak info for each gene that was found 
    to be associated with a peak. 
    The reason for reverse engeneering rather than 
    storing this information when searching for the genes
    for each peak is that we want to use precisely the same
    function to search the genes for the real data and for the 
    permutations.


    Input:
    gene_info ... data frame with index ('chrom','start')
                and columns 'gene_id' and 'end'
    top_s ... series of peak positions with index (chrom, pos)
                and values peak height
    max_dist ... maximum distance between gene and peak
    """
    gene_info = sub_gene_df

    def get_dist(df, gene_pos):
        """
        calculate distance
        """
        s = pd.Series(df.index.droplevel(0).values - gene_pos.ix[df.index[0][0]], index=df.index.droplevel(0).values)
        return s

    tot_gene_peaks_df = pd.DataFrame()
    if not top_s.index.is_monotonic:
        top_s = top_s.sortlevel([0, 1])
    if not gene_info.index.is_monotonic:
        gene_info = gene_info.sort_index()
    for chrom in gene_info.index.droplevel(1).unique():
        loc_top_s = top_s.ix[chrom]
        start = np.searchsorted(loc_top_s.index.values + max_dist, gene_info.ix[chrom].index.values)
        end = np.searchsorted(loc_top_s.index.values - max_dist, gene_info.ix[chrom]["end"].values)
        x = pd.concat(
            [loc_top_s.iloc[st:ed] for st, ed in zip(start, end)], keys=gene_info.ix[chrom][feature_name].values
        )
        x.name = "peak_height"

        dist_start = x.groupby(lambda i: i[0]).apply(
            lambda df: get_dist(df, gene_info.ix[chrom].reset_index().set_index(feature_name)["start"])
        )
        dist_start.name = "dist_start"
        dist_end = x.groupby(lambda i: i[0]).apply(
            lambda df: get_dist(df, gene_info.ix[chrom].set_index(feature_name)["end"])
        )
        dist_end.name = "dist_end"
        gene_peaks_df = pd.concat([x, dist_start, dist_end], axis=1)
        gene_peaks_df.index = pd.MultiIndex.from_arrays(
            [gene_peaks_df.index.droplevel(1), [chrom] * len(x), gene_peaks_df.index.droplevel(0)]
        )
        tot_gene_peaks_df = pd.concat([tot_gene_peaks_df, gene_peaks_df], axis=0)

    tot_gene_peaks_df.index.names = [feature_name, "chrom", "peak_pos"]
    return tot_gene_peaks_df
Example #12
0
    def derivatives(self, x, der):
        """Evaluate a derivative of the piecewise polynomial
        Parameters
        ----------
        x : scalar or array-like of length N
        der : integer
            how many derivatives (including the function value as
            0th derivative) to extract

        Returns
        -------
        y : array-like of shape der by R or der by N or der by N by R

        """
        if _isscalar(x):
            pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2)
            y = self.polynomials[pos].derivatives(x,der=der)
        else:
            x = np.asarray(x)
            m = len(x)
            pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2)
            if self.vector_valued:
                y = np.zeros((der,m,self.r))
            else:
                y = np.zeros((der,m))
            for i in xrange(self.n-1):
                c = pos==i
                y[:,c] = self.polynomials[i].derivatives(x[c],der=der)
        return y
Example #13
0
 def _regrid_indices(cells, depth, points):
     # Calculate the minimum difference in cell extent.
     extent = np.min(np.diff(cells))
     if extent == 0:
         # Detected an dimension coordinate with an invalid
         # zero length cell extent.
         msg = 'The target grid cube {} ({!r}) coordinate contains ' \
             'a zero length cell extent.'
         axis, name = 'x', tx.name()
         if points is sy_points:
             axis, name = 'y', ty.name()
         raise ValueError(msg.format(axis, name))
     elif extent > 0:
         # The cells of the dimension coordinate are in ascending order.
         indices = np.searchsorted(cells, points, side='right') - 1
     else:
         # The cells of the dimension coordinate are in descending order.
         # np.searchsorted() requires ascending order, so we require to
         # account for this restriction.
         cells = cells[::-1]
         right = np.searchsorted(cells, points, side='right')
         left = np.searchsorted(cells, points, side='left')
         indices = depth - right
         # Only those points that exactly match the left-hand cell bound
         # will differ between 'left' and 'right'. Thus their appropriate
         # target cell location requires to be recalculated to give the
         # correct descending [upper, lower) interval cell, source to target
         # regrid behaviour.
         delta = np.where(left != right)[0]
         if delta.size:
             indices[delta] = depth - left[delta]
     return indices
Example #14
0
    def get_tmax(self, p, cutoff=None):
        if cutoff is None:
            cutoff = self.cutoff

        if self.quad:
            x = np.arange(1, 10000, 1)
            y = np.zeros_like(x)
            func = self.function(x, p)
            func_half = self.function(x[:-1] + 1 / 2, p)
            y[1:] = y[0] + np.cumsum(1 / 6 *
                                     (func[:-1] + 4 * func_half + func[1:]))
            y = y / quad(self.function, 0, np.inf, args=p)[0]
            return np.searchsorted(y, cutoff)

        else:
            t1 = -np.sqrt(3 / 5)
            t2 = 0
            t3 = np.sqrt(3 / 5)
            w1 = 5 / 9
            w2 = 8 / 9
            w3 = 5 / 9

            x = np.arange(1, 10000, 1)
            y = np.zeros_like(x)
            func = self.function(x, p)
            func_half = self.function(x[:-1] + 1 / 2, p)
            y[0] = 0.5 * (w1 * self.function(0.5 * t1 + 0.5, p) +
                          w2 * self.function(0.5 * t2 + 0.5, p) +
                          w3 * self.function(0.5 * t3 + 0.5, p))
            y[1:] = y[0] + np.cumsum(1 / 6 *
                                     (func[:-1] + 4 * func_half + func[1:]))
            y = y / quad(self.function, 0, np.inf, args=p)[0]
            return np.searchsorted(y, cutoff)
Example #15
0
    def __call__(self, x):
        """Evaluate the piecewise polynomial

        Parameters
        ----------
        x : scalar or array-like of length N

        Returns
        -------
        y : scalar or array-like of length R or length N or N by R
        """
        if _isscalar(x):
            pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2)
            y = self.polynomials[pos](x)
        else:
            x = np.asarray(x)
            m = len(x)
            pos = np.clip(np.searchsorted(self.xi, x) - 1, 0, self.n-2)
            if self.vector_valued:
                y = np.zeros((m,self.r))
            else:
                y = np.zeros(m)
            for i in xrange(self.n-1):
                c = pos==i
                y[c] = self.polynomials[i](x[c])
        return y
Example #16
0
def downsample(image, image_x_axis, image_y_axis,
               x_bounds, y_bounds, x_resolution, y_resolution):

    x_resolution, y_resolution = int(round(x_resolution)), int(round(y_resolution))
    x_bounds = [x_bounds.start, x_bounds.end]
    y_bounds = [y_bounds.start, y_bounds.end]
    x_bounds = np.searchsorted(image_x_axis, x_bounds)
    y_bounds = np.searchsorted(image_y_axis, y_bounds)
    #y_bounds = image.shape[0] + 1 - y_bounds[::-1]

    if x_resolution == 0 or y_resolution == 0:
        subset = np.zeros((1,1), dtype=image.dtype)
    else:
        subset = image[y_bounds[0]:y_bounds[1],
                       x_bounds[0]:x_bounds[1]]
        x_downsample_factor = max(round(subset.shape[1] / x_resolution / 3.), 1)
        y_downsample_factor = max(round(subset.shape[0] / y_resolution / 3.), 1)
        subset = subset[::x_downsample_factor, ::y_downsample_factor]
        image = scipy.misc.imresize(subset, (x_resolution, y_resolution),
                            interp='nearest')

    bounds = image_x_axis[x_bounds[0]:x_bounds[1]]
    dw = np.max(bounds) - np.min(bounds)
    bounds = image_y_axis[y_bounds[0]:y_bounds[1]]
    dh = np.max(bounds) - np.min(bounds)
    return {'data': subset,
            'offset_x': image_x_axis[x_bounds[0]],
            'offset_y': image_y_axis[y_bounds[0]],
            'dw': dw,
            'dh': dh,
    }
Example #17
0
 def get_indices(ival):
     """ Retuns the indeces surrounding the given interval"""
     start_ind = np.searchsorted(self.x, ival[0], side='right')
     end_ind = np.searchsorted(self.x, ival[1], side='left')
     assert start_ind > 0 and end_ind < len(self.x), \
         "Invalid averaging interval"
     return start_ind, end_ind
Example #18
0
	def __interpFunction_BoundaryConstant(self, x, y):
		xNextIndex = np.searchsorted(self.xData, x)	
		if (xNextIndex == 0):
			xNextIndex = 0
			xPrevIndex = xNextIndex
			xCoeff = 0
		elif (xNextIndex >= len(self.xData)):
			xNextIndex = xNextIndex - 1
			xPrevIndex = xNextIndex
			xCoeff = 0
		else:
			xPrevIndex = xNextIndex-1
			xCoeff = (x-self.xData[xPrevIndex])/(self.xData[xNextIndex]-self.xData[xPrevIndex])
		
		yNextIndex = np.searchsorted(self.yData, y)
		if (yNextIndex == 0):
			yNextIndex = 0
			yPrevIndex = yNextIndex
			yCoeff = 0
		elif (yNextIndex >= len(self.yData)):
			yNextIndex = yNextIndex - 1
			yPrevIndex = yNextIndex
			yCoeff = 0
		else:
			yPrevIndex = yNextIndex-1
			yCoeff = (y-self.yData[yPrevIndex])/(self.yData[yNextIndex]-self.yData[yPrevIndex])
		
		value1 = self.zData[yPrevIndex][xPrevIndex] + xCoeff*(self.zData[yPrevIndex][xNextIndex]-self.zData[yPrevIndex][xPrevIndex])
		value2 = self.zData[yNextIndex][xPrevIndex] + xCoeff*(self.zData[yNextIndex][xNextIndex]-self.zData[yNextIndex][xPrevIndex])
		interpValue = value1 + yCoeff*(value2 - value1)
		return interpValue
Example #19
0
def evaluation_pairs(det_chords, ann_chords):
    """
    Match detected with annotated chords and create paired label segments
    for evaluation.

    Parameters
    ----------
    det_chords : numpy structured array
        Chord detections with 'start' and 'end' fields.
    ann_chords : numpy structured array
        Chord annotations with 'start' and 'end' fields.

    Returns
    -------
    annotations : numpy structured array
        Annotated chords of evaluation segments.
    detections : numpy structured array
        Detected chords of evaluation segments.
    durations : numpy array
        Durations of evaluation segments.

    """
    times = np.unique(np.hstack([ann_chords['start'], ann_chords['end'],
                                 det_chords['start'], det_chords['end']]))

    durations = times[1:] - times[:-1]
    annotations = ann_chords['chord'][
        np.searchsorted(ann_chords['start'], times[:-1], side='right') - 1]
    detections = det_chords['chord'][
        np.searchsorted(det_chords['start'], times[:-1], side='right') - 1]

    return annotations, detections, durations
Example #20
0
def BayesianWords(unigram_counts, bigram_counts, n_words):
  unigrams, ucounts = zip(*sorted(filter(
      lambda (k, v): k in bigram_counts,
      unigram_counts.items())))
  prior = np.array(ucounts) / sum(ucounts)
  prior_pdf = np.array([np.sum(prior[:n]) for n in range(len(unigrams))])

  bigram_pdfs = {}
  for w1, w1_bgcnts in bigram_counts.iteritems():
    w2strs, w2counts = zip(*sorted(w1_bgcnts.items()))
    w2pdf = np.array(w2counts) / sum(w2counts)
    bigram_pdfs[w1] = (
        w2strs,
        np.array([np.sum(w2pdf[:n]) for n in range(len(w2strs))]))
    #print '%d bigrams for %s' % (len(w2strs), w1)

  first_word_index = np.searchsorted(prior_pdf, np.random.random_sample())
  words = [unigrams[min(len(unigrams)-1, first_word_index)]]
  for n in range(1, n_words):
    if words[-1] in bigram_pdfs:
      bigram_strs, bigram_pdf = bigram_pdfs[words[-1]]
      idx = np.searchsorted(bigram_pdf, np.random.random_sample())
      words.append(bigram_strs[min(len(bigram_strs)-1, idx)])
    else:
      # Pick from the prior.
      idx = np.searchsorted(prior_pdf, np.random.random_sample())
      words.append(unigrams[min(len(unigrams)-1, idx)])
  return words
Example #21
0
    def interp(self, rho, temp, log=False):
        dens = self.dens
        temps = self.temps

        if log == True:
            rho   = np.log10(rho)
            temp  = np.log10(temp)
            dens  = np.log10(dens)
            temps = np.log10(temps)

        # First, find the temperature/density cell we are in.
        # The opacity will be computed using densities:
        #   dens[jd-1], dens[jd]
        # and temperatures:
        #   temp[jt-1], temp[jt]

        jd = np.searchsorted(dens, rho)
        if jd == 0: 
            rho = dens[0]
            jd += 1

        if jd == len(dens): 
            jd = jd - 1
            rho = dens[-1]

        jt = np.searchsorted(temps, temp)
        if jt == 0: 
            temp = temps[0]
            jt += 1

        if jt == len(temps): 
            jt = jt - 1
            temp = temps[-1]

        # Now that the surrounding temperatures/densities have been
        # identified, the interpolation coefficients can be computed.
        # c1 -> weight for dens[jd-1] and temp[jt-1]
        # c2 -> weight for dens[jd]   and temp[jt-1]
        # c3 -> weight for dens[jd-1] and temp[jt]
        # c4 -> weight for dens[jd]   and temp[jt]

        d1 = dens[jd-1]
        d2 = dens[jd]
        t1 = temps[jt-1]
        t2 = temps[jt]
                
        delta = (rho-d1)/(d2-d1)
        tau   = (temp-t1)/(t2-t1)

        c1 = (delta-1.0)*(tau-1.0)
        c2 = delta*(1-tau)
        c3 = tau*(1-delta)
        c4 = delta * tau

        # Compute the interpolated opacity:
        return \
            c1 * self.go(jd-1,jt-1) + \
            c2 * self.go(jd  ,jt-1) + \
            c3 * self.go(jd-1,jt  ) + \
            c4 * self.go(jd  ,jt  )
Example #22
0
def get_features(peak_s, feature_df, feature_name='feature', max_dist=0):
    """
    take the input series and gets.
    names of features nearby

    Input:
    peak_s ... pandas series with (chrom, pos) index and value of
                the statistic ('peak height'). Series should be named.
    feature_df ... data frame with feature info.
    """
    all_features = []
    if not feature_df.index.is_monotonic:
        feature_df = feature_df.sort_index()
    tot_hit_df = pd.DataFrame()
    for chrom in peak_s.index.droplevel(1).unique():
        loc_feature_df = feature_df.ix[chrom]
        #loc_feature_df = loc_feature_df.append(pd.DataFrame(np.nan,index=[np.inf],columns=loc_feature_df.columns))
        #print loc_feature_df.index-max_dist, peak_s.ix[chrom].index.values
        #try:
        pos_rel_to_start = np.searchsorted(loc_feature_df.index.values-max_dist,peak_s.ix[chrom].index.values)
        #except:
        #    print chrom, peak_s.ix[chrom]
        pos_rel_to_end = np.searchsorted(loc_feature_df["end"].values+max_dist,peak_s.ix[chrom].index.values)
        features = list(set(loc_feature_df[feature_name].iloc[np.hstack([range(a,b) for a,b in zip(pos_rel_to_end,pos_rel_to_start)])]))
        all_features += features
    return all_features
Example #23
0
def _linearslice(linearbpf, x0, x1):
    """
    Slice the given bpf, returning a new Linear bpf with endpoints
    x0 and x1.
    """
    assert isinstance(linearbpf, core.Linear)
    X, Y = linearbpf.points()
    insert_head = x0 > X[0]
    if insert_head:
        i = np.searchsorted(X, x0)
        X = X[i-1:]
        Y = Y[i-1:]
    insert_tail = x1 < X[-1]
    if insert_tail:
        i = np.searchsorted(X, x1)
        X = X[:i+1]
        Y = Y[:i+1]
    if insert_head or insert_tail:
        # we copy when we know exactly how much to copy
        X = X.copy()
        Y = Y.copy()
    if insert_head:
        X[0] = x0
        Y[0] = linearbpf(x0)
    if insert_tail:
        X[i] = x1
        Y[i] = linearbpf(x1)    
    return core.Linear(X, Y)
Example #24
0
    def get_exclude_coords(self, ex_starts, ex_ends):
                                                    
        mx=self.starts.shape[0]-1
        n_exclude = len(ex_ends)     
        ex_wnd_starts = np.searchsorted(self.starts, ex_starts)
        ex_wnd_ends   = np.searchsorted(self.ends, ex_ends)
        ex_wnd_starts = np.amax(np.c_[ex_wnd_starts-1,np.zeros(n_exclude)],1).astype(int)
        ex_wnd_ends = np.amin(np.c_[ex_wnd_ends+1,np.ones(n_exclude)*mx],1).astype(int)
        ex_starts = self.starts[ex_wnd_starts] 
        ex_ends = self.ends[ex_wnd_ends] 

        ex_coords = [] 
        
        curr_s = ex_starts[0]
        curr_e = ex_ends[0]

        #print ex_wnd_starts
        #print ex_wnd_ends

        for i in xrange(1, n_exclude):
            if ex_starts[i] < curr_e:
                curr_e = ex_ends[i]
            else:
                ex_coords.append(tuple([curr_s,curr_e]))
                curr_s = ex_starts[i]
                curr_e = ex_ends[i]
        
        ex_coords.append(tuple([curr_s,curr_e]))
        return ex_coords
def build3DHistogramArray(inputA=None,xBinVector=None,yBinVector=None):
    """
    This builds and bins up the DCPD exposure surface.
    The input is a time parameterized array
    [beamlist,beampitchlist,beamyawlist]
    """
    threeDData=list()
    for i,aCol0 in enumerate(inputA[0]):
        threeDData.append((inputA[0][i],\
                           inputA[1][i],\
                           inputA[2][i]))
    #Sort the input structure by col0
    threeDData.sort()
    zMapLists=empty([len(xBinVector),len(yBinVector)],dtype=object)
    for ii in range(0,len(xBinVector)):
        for jj in range(0,len(yBinVector)):
            zMapLists[ii][jj]=list()
    for xIndex in range(0,len(xBinVector)-1):
        xLow=xBinVector[xIndex]
        xHigh=xBinVector[xIndex+1]
        xData=[a for a,b,c in threeDData]
        # Find all points that fit this X column
        dataSubset=threeDData[searchsorted(xData,xLow,side='left'):\
                              searchsorted(xData,xHigh,side='right')]
        yDataSubset=[(b,c) for a,b,c in dataSubset]
        yDataSubset.sort()
        yData=[b for b,c in yDataSubset]
        for yIndex in range(0,len(yBinVector)-1):
            yLow=yBinVector[yIndex]
            yHigh=yBinVector[yIndex+1]
            dataBinMatch=yDataSubset[searchsorted(yData,yLow,side='left'):\
                                     searchsorted(yData,yHigh,side='right')]
            zDataMatch=[c for b,c in dataBinMatch]
            zMapLists[xIndex][yIndex].extend(zDataMatch)
    return zMapLists
Example #26
0
def get_scx_scz_in_timerange(timerange, file):
    """
    read a downloaded FERMI weekly pointing file and extract scx, scz for a timerange.

    Parameters
    ----------

    date : `datetime.datetime` 
        A datetime object or other date format understood by the parse_time function.
    file : str
        A filepath to a Fermi/LAT weekly pointing file (e.g. as obtained by the
        download_weekly_pointing_file function).
    """
    
    hdulist = fits.open(file)
    timesinutc = []
    for tim in hdulist[1].data['START']:
        timesinutc.append(met_to_utc(tim))

    startind = np.searchsorted(timesinutc, timerange.start)
    endind = np.searchsorted(timesinutc, timerange.end)

    scx_radec = []
    scz_radec = []
    for i in range(startind, endind):
        scx_radec.append((Longitude(hdulist[1].data['RA_SCX'][i]*u.deg),
                          Latitude(hdulist[1].data['DEC_SCX'][i]*u.deg)))
        scz_radec.append((Longitude(hdulist[1].data['RA_SCZ'][i]*u.deg),
                          Latitude(hdulist[1].data['DEC_SCZ'][i]*u.deg)))
    return scx_radec, scz_radec, timesinutc[startind:endind]
Example #27
0
def kuiper_two(data1, data2):
    """Compute the Kuiper statistic to compare two samples.

    Parameters
    ----------
    data1 : array-like
        The first set of data values.
    data2 : array-like
        The second set of data values.
    
    Returns
    -------
    D : float
        The raw test statistic.
    fpp : float
        The probability of obtaining two samples this different from
        the same distribution.

    Notes
    -----
    Warning: the fpp is quite approximate, especially for small samples.

    """
    data1, data2 = sort(data1), sort(data2)

    if len(data2)<len(data1):
        data1, data2 = data2, data1

    cdfv1 = searchsorted(data2, data1)/float(len(data2)) # this could be more efficient
    cdfv2 = searchsorted(data1, data2)/float(len(data1)) # this could be more efficient
    D = (amax(cdfv1-arange(len(data1))/float(len(data1))) + 
            amax(cdfv2-arange(len(data2))/float(len(data2))))

    Ne = len(data1)*len(data2)/float(len(data1)+len(data2))
    return D, kuiper_FPP(D, Ne)
Example #28
0
File: dok.py Project: 87/scipy
 def split(self, cols_or_rows, columns=1):
     # Similar to take but returns two arrays, the extracted columns plus
     # the resulting array.  Assumes cols_or_rows is sorted
     base = dok_matrix()
     ext = dok_matrix()
     indx = int((columns == 1))
     if indx:
         for key in self.keys():
             num = np.searchsorted(cols_or_rows, key[1])
             if cols_or_rows[num] == key[1]:
                 newkey = (key[0], num)
                 ext[newkey] = self[key]
             else:
                 newkey = (key[0], key[1]-num)
                 base[newkey] = self[key]
     else:
         for key in self.keys():
             num = np.searchsorted(cols_or_rows, key[0])
             if cols_or_rows[num] == key[0]:
                 newkey = (num, key[1])
                 ext[newkey] = self[key]
             else:
                 newkey = (key[0]-num, key[1])
                 base[newkey] = self[key]
     return base, ext
Example #29
0
def skim_imgs(Mimg, Mimg_tabs, Msnp_tabs, t_adjust=0, tb0=SKIMSPK_TB,
        te0=SKIMSPK_TE, n_blk=20000, onlyonce=True):
    if onlyonce:
        idx_eachimg = [np.nonzero(Mimg == i_img)[0][0] for i_img
                in np.unique(Mimg)]
        t_eachimg = Mimg_tabs[idx_eachimg]
        i_eachimg = Mimg[idx_eachimg]
    else:
        t_eachimg = Mimg_tabs
        i_eachimg = Mimg

    ibie = []
    ib = 0
    ie = 0
    for t0 in t_eachimg:
        tb = t0 + tb0 - t_adjust
        te = t0 + te0 - t_adjust

        xb = np.searchsorted(Msnp_tabs[ib: ib + n_blk], tb)
        if xb >= n_blk:
            xb = np.searchsorted(Msnp_tabs[ib:], tb)
        ib += xb

        xe = np.searchsorted(Msnp_tabs[ie: ie + n_blk], te)
        if xe >= n_blk:
            xe = np.searchsorted(Msnp_tabs[ie:], te)
        ie += xe
        ibie.append((ib, ie))
    return ibie, i_eachimg
Example #30
0
    def _substitute_iers_b(cls, table):
        """Substitute IERS B values with those from a real IERS B table.

        IERS-A has IERS-B values included, but for reasons unknown these
        do not match the latest IERS-B values (see comments in #4436).
        Here, we use the bundled astropy IERS-B table to overwrite the values
        in the downloaded IERS-A table.
        """
        iers_b = IERS_B.open()
        # Substitute IERS-B values for existing B values in IERS-A table
        mjd_b = table['MJD'][~table['UT1_UTC_B'].mask]
        i0 = np.searchsorted(iers_b['MJD'].value, mjd_b[0], side='left')
        i1 = np.searchsorted(iers_b['MJD'].value, mjd_b[-1], side='right')
        iers_b = iers_b[i0:i1]
        n_iers_b = len(iers_b)
        # If there is overlap then replace IERS-A values from available IERS-B
        if n_iers_b > 0:
            # Sanity check that we are overwriting the correct values
            if not np.allclose(table['MJD'][:n_iers_b], iers_b['MJD'].value):
                raise ValueError('unexpected mismatch when copying '
                                 'IERS-B values into IERS-A table.')
            # Finally do the overwrite
            table['UT1_UTC_B'][:n_iers_b] = iers_b['UT1_UTC'].value
            table['PM_X_B'][:n_iers_b] = iers_b['PM_x'].value
            table['PM_Y_B'][:n_iers_b] = iers_b['PM_y'].value

        return table
Example #31
0
                                                       vals_col='cumulative')
    temp['per_capita_deaths'] = rona_db.find_per_capita(temp,
                                                        pop_col='popData2018',
                                                        vals_col='cum_deaths')
    temp.reset_index(drop=True, inplace=True)
    countries[country] = temp

full_data = []
for j, date in enumerate(dates):
    date_countries = []
    for i, country in enumerate(countries):
        country_list = [
            str(i)[:10] for i in countries[country]['dateRep'].values
        ]
        if str(date)[:10] in country_list:
            index = np.searchsorted(countries[country]['dateRep'], date)
            y = countries[country]['cumulative'].iloc[:index + 1]
            x = countries[country]['dateRep'].iloc[:index + 1]
            x = [str(i)[:10] for i in x]
            temp = {'country': country, 'x': x, 'y': y}

            date_countries.append(temp)
    full_data.append(date_countries)

# for i, country in enumerate(countries):
#     full_data.append({'country': country, 'x': [], 'y' : []})
#     country_list = [str(i)[:10] for i in countries[country]['dateRep'].values]
#     for j, date in enumerate(dates):
#         if str(date)[:10] in country_list:
#             index = np.searchsorted(countries[country]['dateRep'], date)
#             y = countries[country]['cumulative'].iloc[index]
Example #32
0
def _process_chunk(db_lock, log_lock, sema, sed_fit_name, cosmoDC2_data,
                   first_gal, self_dict, bad_gals):
    """
    Do all chunk-specific work:  compute table contents for a
    collection of galaxies and write to db

    Parameters
    ----------
    db_lock          Used to avoid conflicts writing to sqlite output
    log_lock         Used to avoid conflicts writing to per-healpixel log
    sema             A semaphore. Release when done
    sed_fit_name     File where sed fits for this healpixel are
    cosmoDC2_data    Values from cosmoDC2 for this healpixel, keyed by
                     column name
    first_gal        index of first galaxy in our chunk (in sed fit list)
    self_dict        Random useful values stored in GalaxyTruthWriter
    bad_gals         List of galaxy ids, monotone increasing, to be
                     skipped
    """

    dry = self_dict['dry']
    chunk_size = self_dict['chunk_size']
    dbfile = self_dict['dbfile']
    logfile = self_dict['logfile']

    if dry:
        _logit(
            log_lock, logfile,
            '_process_chunk invoke for first_gal {}, chunk size {}'.format(
                first_gal, chunk_size))
        if sema is None:
            return
        sema.release()

        #exit(0)
        return

    lsst_bp_dict = self_dict['lsst_bp_dict']
    galaxy_ids = []
    ra = []
    dec = []
    redshift = []
    ebv_vals = None
    ebv_vals_init = False  # does this belong somewhere else?
    ccm_w = None
    total_gals = self_dict['total_gals']

    chunk_start = first_gal
    chunk_end = min(first_gal + chunk_size, total_gals)
    with h5py.File(sed_fit_name, 'r') as sed_fit_file:

        sed_names = sed_fit_file['sed_names'][()]
        sed_names = [s.decode() for s in sed_names]  # becse stored as bytes

        gals_this_chunk = chunk_end - chunk_start
        subset = slice(chunk_start, chunk_end)
        galaxy_ids = sed_fit_file['galaxy_id'][()][subset]
        to_log = 'Start with galaxy #{}, id={}\n# galaxies for _process_chunk: {}\n'.format(
            first_gal, galaxy_ids[0], len(galaxy_ids))
        _logit(log_lock, logfile, to_log)

        # get the cross-match between the sed fit and cosmoDC2
        cosmo_len = len(cosmoDC2_data['galaxy_id'])

        crossmatch_dex = np.searchsorted(cosmoDC2_data['galaxy_id'],
                                         galaxy_ids)
        np.testing.assert_array_equal(
            galaxy_ids, cosmoDC2_data['galaxy_id'][crossmatch_dex])

        ra = sed_fit_file['ra'][()][subset]
        dec = sed_fit_file['dec'][()][subset]
        np.testing.assert_array_equal(ra, cosmoDC2_data['ra'][crossmatch_dex])
        np.testing.assert_array_equal(dec,
                                      cosmoDC2_data['dec'][crossmatch_dex])

        good_ixes = _good_indices(galaxy_ids.tolist(), bad_gals[0])
        if (len(good_ixes) == 0):
            if sema is not None:
                sema.release()
            return
        else:
            _logit(
                log_lock, logfile,
                'Found {} good indices for chunk starting with {}\n'.format(
                    len(good_ixes), chunk_start))
        flux_by_band_MW = {}
        flux_by_band_noMW = {}

        # Calculate E(B-V) for dust extinction in Milky Way along relevant
        # lines of sight
        band_print = "Processing band {}, first gal {}, time {}\n"
        if not ebv_vals_init:
            equatorial_coords = np.array([np.radians(ra), np.radians(dec)])
            ebv_model = EBVbase()
            ebv_vals = ebv_model.calculateEbv(
                equatorialCoordinates=equatorial_coords, interp=True)
            ebv_vals_init = True

        for i_bp, bp in enumerate('ugrizy'):
            if (i_bp == 0 or i_bp == 5):
                _logit(log_lock, logfile,
                       band_print.format(bp, first_gal, dt.now()))
            fluxes_noMW = {}
            fluxes = {}
            for component in ['disk', 'bulge']:
                fluxes_noMW[component] = np.zeros(gals_this_chunk, dtype=float)
                fluxes[component] = np.zeros(gals_this_chunk, dtype=float)

            for component in ['disk', 'bulge']:
                #print("   Processing component ", component)
                sed_arr = sed_fit_file['%s_sed' % component][()][subset]
                av_arr = sed_fit_file['%s_av' % component][()][subset]
                rv_arr = sed_fit_file['%s_rv' % component][()][subset]
                mn_arr = sed_fit_file['%s_magnorm' %
                                      component][()][i_bp, :][subset]
                z_arr = cosmoDC2_data['redshift'][crossmatch_dex]
                gii = 0
                done = False
                for i_gal, (s_dex, mn, av, rv, zz, ebv) in enumerate(
                        zip(sed_arr, mn_arr, av_arr, rv_arr, z_arr, ebv_vals)):
                    if done: break
                    while good_ixes[gii] < i_gal:
                        gii += 1
                        if gii == len(good_ixes):  # ran out of good ones
                            done = True
                            break
                    if done: break
                    if good_ixes[gii] > i_gal:  # skipped over it; it's bad
                        continue
                    # Leave space for it in the arrays, but values
                    # for all the fluxes will be left at 0

                    # read in the SED file from the library
                    sed_file_name = os.path.join(self_dict['sed_lib_dir'],
                                                 sed_names[s_dex])
                    sed = sims_photUtils.Sed()
                    sed.readSED_flambda(sed_file_name)

                    # find and apply normalizing flux
                    fnorm = sims_photUtils.getImsimFluxNorm(sed, mn)
                    sed.multiplyFluxNorm(fnorm)

                    # add internal dust
                    if ccm_w is None or not np.array_equal(sed.wavelen, ccm_w):
                        ccm_w = np.copy(sed.wavelen)
                        a_x, b_x = sed.setupCCM_ab()
                    sed.addDust(a_x, b_x, A_v=av, R_v=rv)

                    # apply redshift
                    sed.redshiftSED(zz, dimming=True)

                    # flux, in Janskys, without Milky Way dust extinction
                    f_noMW = sed.calcFlux(lsst_bp_dict[bp])

                    # apply Milky Way dust
                    # (cannot reuse a_x, b_x because wavelength grid changed
                    # when we called redshiftSED)
                    a_x_mw, b_x_mw = sed.setupCCM_ab()
                    sed.addDust(a_x_mw, b_x_mw, R_v=3.1, ebv=ebv)

                    f_MW = sed.calcFlux(lsst_bp_dict[bp])

                    fluxes_noMW[component][i_gal] = f_noMW
                    fluxes[component][i_gal] = f_MW
                if (component == 'disk') and (bp == 'r'):
                    redshift = z_arr

            # Sum components and convert to nanojansky
            total_fluxes = (fluxes_noMW['disk'] + fluxes_noMW['bulge']) * 10**9
            total_fluxes_MW = (fluxes['disk'] + fluxes['bulge']) * 10**9

            dummy_sed = sims_photUtils.Sed()

            # add magnification due to weak lensing
            kappa = cosmoDC2_data['convergence'][crossmatch_dex]
            gamma_sq = (cosmoDC2_data['shear_1'][crossmatch_dex]**2 +
                        cosmoDC2_data['shear_2'][crossmatch_dex]**2)
            magnification = 1.0 / ((1.0 - kappa)**2 - gamma_sq)
            magnified_fluxes = magnification * total_fluxes
            magnified_fluxes_MW = magnification * total_fluxes_MW
            flux_by_band_noMW[bp] = magnified_fluxes
            flux_by_band_MW[bp] = magnified_fluxes_MW

    #  Open connection to sqlite db and write
    #print('Time before db write is {}, first gal={}'.format(dt.now(), first_gal))
    #sys.stdout.flush()
    if not db_lock.acquire(timeout=120.0):
        _logit(log_lock, logfile, "Failed to acquire db lock, first gal=",
               first_gal)
        if sema is None:
            return
        sema.release()
        exit(1)

    try:
        _write_sqlite(dbfile, galaxy_ids, ra, dec, redshift, flux_by_band_MW,
                      flux_by_band_noMW, good_ixes)
        db_lock.release()
        if sema is not None:
            sema.release()

        _logit(
            log_lock, logfile,
            'Time after db write: {}, first_gal={}\n'.format(
                dt.now(), first_gal))
        exit(0)
    except Exception as ex:
        db_lock.release()
        if sema is not None:
            sema.release()
        raise (ex)
Example #33
0
    def accumulate(self, p=None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print('Accumulating evaluation results...')
        tic = time.time()
        if not self.evalImgs:
            print('Please run evaluate() first')
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T = len(p.iouThrs)
        R = len(p.recThrs)
        K = len(p.catIds) if p.useCats else 1
        A = len(p.areaRng)
        M = len(p.maxDets)
        precision = -np.ones(
            (T, R, K, A, M))  # -1 for the precision of absent categories
        recall = -np.ones((T, K, A, M))
        scores = -np.ones((T, R, K, A, M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds) if k in setK]
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        a_list = [
            n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng))
            if a in setA
        ]
        i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0 * A0 * I0
            for a, a0 in enumerate(a_list):
                Na = a0 * I0
                for m, maxDet in enumerate(m_list):
                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
                    E = [e for e in E if not e is None]
                    if len(E) == 0:
                        continue
                    dtScores = np.concatenate(
                        [e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    # mergesort is used to be consistent as Matlab implementation.
                    inds = np.argsort(-dtScores, kind='mergesort')
                    dtScoresSorted = dtScores[inds]

                    dtm = np.concatenate(
                        [e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                          inds]
                    dtIg = np.concatenate(
                        [e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:,
                                                                         inds]
                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
                    npig = np.count_nonzero(gtIg == 0)
                    if npig == 0:
                        continue
                    tps = np.logical_and(dtm, np.logical_not(dtIg))
                    fps = np.logical_and(np.logical_not(dtm),
                                         np.logical_not(dtIg))

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                        tp = np.array(tp)
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp + tp + np.spacing(1))
                        q = np.zeros((R, ))
                        ss = np.zeros((R, ))

                        if nd:
                            recall[t, k, a, m] = rc[-1]
                        else:
                            recall[t, k, a, m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist()
                        q = q.tolist()

                        for i in range(nd - 1, 0, -1):
                            if pr[i] > pr[i - 1]:
                                pr[i - 1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs, side='left')
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                                ss[ri] = dtScoresSorted[pi]
                        except:
                            pass
                        precision[t, :, k, a, m] = np.array(q)
                        scores[t, :, k, a, m] = np.array(ss)
        self.eval = {
            'params': p,
            'counts': [T, R, K, A, M],
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'precision': precision,
            'recall': recall,
            'scores': scores,
        }
        toc = time.time()
        print('DONE (t={:0.2f}s).'.format(toc - tic))
Example #34
0
def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False):
    """Binarize labels in a one-vs-all fashion

    Several regression and binary classification algorithms are
    available in the scikit. A simple way to extend these algorithms
    to the multi-class classification case is to use the so-called
    one-vs-all scheme.

    This function makes it possible to compute this transformation for a
    fixed set of class labels known ahead of time.

    Parameters
    ----------
    y : array-like
        Sequence of integer labels or multilabel data to encode.

    classes : array-like of shape [n_classes]
        Uniquely holds the label for each class.

    neg_label : int (default: 0)
        Value with which negative labels must be encoded.

    pos_label : int (default: 1)
        Value with which positive labels must be encoded.

    sparse_output : boolean (default: False),
        Set to true if output binary array is desired in CSR sparse format

    Returns
    -------
    Y : numpy array or CSR matrix of shape [n_samples, n_classes]
        Shape will be [n_samples, 1] for binary problems.

    Examples
    --------
    >>> from sklearn.preprocessing import label_binarize
    >>> label_binarize([1, 6], classes=[1, 2, 4, 6])
    array([[1, 0, 0, 0],
           [0, 0, 0, 1]])

    The class ordering is preserved:

    >>> label_binarize([1, 6], classes=[1, 6, 4, 2])
    array([[1, 0, 0, 0],
           [0, 1, 0, 0]])

    Binary targets transform to a column vector

    >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])
    array([[1],
           [0],
           [0],
           [1]])

    See also
    --------
    LabelBinarizer : class used to wrap the functionality of label_binarize and
        allow for fitting to classes independently of the transform operation
    """
    if not isinstance(y, list):
        # XXX Workaround that will be removed when list of list format is
        # dropped
        y = check_array(y, accept_sparse='csr', ensure_2d=False, dtype=None)
    else:
        if _num_samples(y) == 0:
            raise ValueError('y has 0 samples: %r' % y)
    if neg_label >= pos_label:
        raise ValueError("neg_label={0} must be strictly less than "
                         "pos_label={1}.".format(neg_label, pos_label))

    if (sparse_output and (pos_label == 0 or neg_label != 0)):
        raise ValueError("Sparse binarization is only supported with non "
                         "zero pos_label and zero neg_label, got "
                         "pos_label={0} and neg_label={1}"
                         "".format(pos_label, neg_label))

    # To account for pos_label == 0 in the dense case
    pos_switch = pos_label == 0
    if pos_switch:
        pos_label = -neg_label

    y_type = type_of_target(y)
    if 'multioutput' in y_type:
        raise ValueError("Multioutput target data is not supported with label "
                         "binarization")
    if y_type == 'unknown':
        raise ValueError("The type of target data is not known")

    n_samples = y.shape[0] if sp.issparse(y) else len(y)
    n_classes = len(classes)
    classes = np.asarray(classes)

    if y_type == "binary":
        if len(classes) == 1:
            Y = np.zeros((len(y), 1), dtype=np.int)
            Y += neg_label
            return Y
        elif len(classes) >= 3:
            y_type = "multiclass"

    sorted_class = np.sort(classes)
    if (y_type == "multilabel-indicator" and classes.size != y.shape[1]):
        raise ValueError("classes {0} missmatch with the labels {1}"
                         "found in the data".format(classes, unique_labels(y)))

    if y_type in ("binary", "multiclass"):
        y = column_or_1d(y)

        # pick out the known labels from y
        y_in_classes = in1d(y, classes)
        y_seen = y[y_in_classes]
        indices = np.searchsorted(sorted_class, y_seen)
        indptr = np.hstack((0, np.cumsum(y_in_classes)))

        data = np.empty_like(indices)
        data.fill(pos_label)
        Y = sp.csr_matrix((data, indices, indptr),
                          shape=(n_samples, n_classes))
    elif y_type == "multilabel-indicator":
        Y = sp.csr_matrix(y)
        if pos_label != 1:
            data = np.empty_like(Y.data)
            data.fill(pos_label)
            Y.data = data
    else:
        raise ValueError("%s target data is not supported with label "
                         "binarization" % y_type)

    if not sparse_output:
        Y = Y.toarray()
        Y = astype(Y, int, copy=False)

        if neg_label != 0:
            Y[Y == 0] = neg_label

        if pos_switch:
            Y[Y == pos_label] = 0
    else:
        Y.data = astype(Y.data, int, copy=False)

    # preserve label ordering
    if np.any(classes != sorted_class):
        indices = np.searchsorted(sorted_class, classes)
        Y = Y[:, indices]

    if y_type == "binary":
        if sparse_output:
            Y = Y.getcol(-1)
        else:
            Y = Y[:, -1].reshape((-1, 1))

    return Y
Example #35
0
    def process(self, **kwargs):
        """Process module."""
        kwargs = self.prepare_input(self.key('luminosities'), **kwargs)
        self._luminosities = kwargs[self.key('luminosities')]
        self._bands = kwargs['all_bands']
        self._band_indices = kwargs['all_band_indices']
        self._frequencies = kwargs['all_frequencies']
        self._radius_phot = np.array(kwargs[self.key('radiusphot')])
        self._temperature_phot = np.array(kwargs[self.key('temperaturephot')])
        self._cutoff_wavelength = kwargs[self.key('cutoff_wavelength')]
        self._times = np.array(kwargs['rest_times'])
        xc = self.X_CONST  # noqa: F841
        fc = self.FLUX_CONST
        cc = self.C_CONST
        ac = ANG_CGS
        cwave_ac = self._cutoff_wavelength * ac
        cwave_ac2 = cwave_ac * cwave_ac
        cwave_ac3 = cwave_ac2 * cwave_ac  # noqa: F841
        zp1 = 1.0 + kwargs[self.key('redshift')]

        lt = len(self._times)

        seds = np.empty(lt, dtype=object)
        rp2 = self._radius_phot**2
        tp = self._temperature_phot

        evaled = False
        for li, lum in enumerate(self._luminosities):
            bi = self._band_indices[li]
            # tpi = tp[li]
            # rp2i = rp2[li]
            if lum == 0.0:
                seds[li] = np.zeros(
                    len(self._sample_wavelengths[bi]) if bi >= 0 else 1)
                continue
            if bi >= 0:
                rest_wavs = self._sample_wavelengths[bi] * ac / zp1
            else:
                rest_wavs = np.array([cc / (self._frequencies[li] * zp1)])

            # Apply absorption to SED only bluewards of cutoff wavelength
            ab = rest_wavs < cwave_ac  # noqa: F841
            tpi = tp[li]  # noqa: F841
            rp2i = rp2[li]  # noqa: F841

            if not evaled:
                # Absorbed blackbody: 0% transmission at 0 Angstroms 100% at
                # >3000 Angstroms.
                sed = ne.evaluate(
                    "where(ab, fc * (rp2i / cwave_ac / "
                    "rest_wavs ** 4) / expm1(xc / rest_wavs / tpi), "
                    "fc * (rp2i / rest_wavs ** 5) / "
                    "expm1(xc / rest_wavs / tpi))")
                evaled = True
            else:
                sed = ne.re_evaluate()

            sed[np.isnan(sed)] = 0.0
            seds[li] = sed

        uniq_times = np.unique(self._times)
        tsort = np.argsort(self._times)
        uniq_is = np.searchsorted(self._times, uniq_times, sorter=tsort)
        lu = len(uniq_times)

        norms = self._luminosities[uniq_is] / (fc / ac * rp2[uniq_is] *
                                               tp[uniq_is])

        rp2 = rp2[uniq_is].reshape(lu, 1)
        tp = tp[uniq_is].reshape(lu, 1)
        tp2 = tp * tp
        tp3 = tp2 * tp  # noqa: F841
        nxcs = self._nxcs  # noqa: F841

        f_blue_reds = ne.evaluate(
            "sum((exp(-nxcs / (cwave_ac * tp)) * ("
            "nxcs ** 2 + 2 * ("
            "nxcs * cwave_ac * tp + cwave_ac2 * tp2)) / ("
            "nxcs ** 3 * cwave_ac3)) + "
            "(6 * tp3 - exp(-nxcs / (cwave_ac * tp)) * ("
            "nxcs ** 3 + 3 * nxcs ** 2 * cwave_ac * tp + 6 * ("
            "nxcs * cwave_ac2 * tp2 + cwave_ac3 *"
            "tp3)) / cwave_ac3) / (nxcs ** 4), 1)")

        norms /= f_blue_reds

        # Apply renormalisation
        seds *= norms[np.searchsorted(uniq_times, self._times)]

        seds = self.add_to_existing_seds(seds, **kwargs)

        return {
            'sample_wavelengths': self._sample_wavelengths,
            self.key('seds'): seds
        }
Example #36
0
def demodata_infr(**kwargs):
    """
    kwargs = {}

    CommandLine:
        python -m wbia.algo.graph.demo demodata_infr --show
        python -m wbia.algo.graph.demo demodata_infr --num_pccs=25
        python -m wbia.algo.graph.demo demodata_infr --profile --num_pccs=100

    Ignore:
        >>> from wbia.algo.graph.demo import *  # NOQA
        >>> from wbia.algo.graph import demo
        >>> import networkx as nx
        >>> kwargs = dict(num_pccs=6, p_incon=.5, size_std=2)
        >>> kwargs = ut.argparse_dict(kwargs)
        >>> infr = demo.demodata_infr(**kwargs)
        >>> pccs = list(infr.positive_components())
        >>> assert len(pccs) == kwargs['num_pccs']
        >>> nonfull_pccs = [cc for cc in pccs if len(cc) > 1 and nx.is_empty(nx.complement(infr.pos_graph.subgraph(cc)))]
        >>> expected_n_incon = len(nonfull_pccs) * kwargs['p_incon']
        >>> n_incon = len(list(infr.inconsistent_components()))
        >>> # TODO can test that we our sample num incon agrees with pop mean
        >>> #sample_mean = n_incon / len(nonfull_pccs)
        >>> #pop_mean = kwargs['p_incon']
        >>> print('status = ' + ut.repr4(infr.status(extended=True)))
        >>> ut.quit_if_noshow()
        >>> infr.show(pickable=True, groupby='name_label')
        >>> ut.show_if_requested()

    Ignore:
        kwargs = {
            'ccs': [[1, 2, 3], [4, 5]]
        }
    """
    import networkx as nx
    import vtool as vt
    from wbia.algo.graph import nx_utils

    def kwalias(*args):
        params = args[0:-1]
        default = args[-1]
        for key in params:
            if key in kwargs:
                return kwargs[key]
        return default

    num_pccs = kwalias('num_pccs', 16)
    size_mean = kwalias('pcc_size_mean', 'pcc_size', 'size', 5)
    size_std = kwalias('pcc_size_std', 'size_std', 0)
    # p_pcc_incon = kwargs.get('p_incon', .1)
    p_pcc_incon = kwargs.get('p_incon', 0)
    p_pcc_incomp = kwargs.get('p_incomp', 0)
    pcc_sizes = kwalias('pcc_sizes', None)

    pos_redun = kwalias('pos_redun', [1, 2, 3])
    pos_redun = ut.ensure_iterable(pos_redun)

    # number of maximum inconsistent edges per pcc
    max_n_incon = kwargs.get('n_incon', 3)

    rng = np.random.RandomState(0)
    counter = 1

    if pcc_sizes is None:
        pcc_sizes = [
            int(randn(size_mean, size_std, rng=rng, a_min=1))
            for _ in range(num_pccs)
        ]
    else:
        num_pccs = len(pcc_sizes)

    if 'ccs' in kwargs:
        # Overwrites other options
        pcc_sizes = list(map(len, kwargs['ccs']))
        num_pccs = len(pcc_sizes)
        size_mean = None
        size_std = 0

    new_ccs = []
    pcc_iter = list(enumerate(pcc_sizes))
    pcc_iter = ut.ProgIter(pcc_iter,
                           enabled=num_pccs > 20,
                           label='make pos-demo')
    for i, size in pcc_iter:
        p = 0.1
        want_connectivity = rng.choice(pos_redun)
        want_connectivity = min(size - 1, want_connectivity)

        # Create basic graph of positive edges with desired connectivity
        g = nx_utils.random_k_edge_connected_graph(size,
                                                   k=want_connectivity,
                                                   p=p,
                                                   rng=rng)
        nx.set_edge_attributes(g, name='evidence_decision', values=POSTV)
        nx.set_edge_attributes(g, name='truth', values=POSTV)
        # nx.set_node_attributes(g, name='orig_name_label', values=i)
        assert nx.is_connected(g)

        # Relabel graph with non-conflicting names
        if 'ccs' in kwargs:
            g = nx.relabel_nodes(g, dict(enumerate(kwargs['ccs'][i])))
        else:
            # Make sure nodes do not conflict with others
            g = nx.relabel_nodes(
                g, dict(enumerate(range(counter,
                                        len(g) + counter + 1))))
            counter += len(g)

        # The probability any edge is inconsistent is `p_incon`
        # This is 1 - P(all edges consistent)
        # which means p(edge is consistent) = (1 - p_incon) / N
        complement_edges = ut.estarmap(nx_utils.e_,
                                       nx_utils.complement_edges(g))
        if len(complement_edges) > 0:
            # compute probability that any particular edge is inconsistent
            # to achieve probability the PCC is inconsistent
            p_edge_inconn = 1 - (1 - p_pcc_incon)**(1 / len(complement_edges))
            p_edge_unrev = 0.1
            p_edge_notcomp = 1 - (1 - p_pcc_incomp)**(1 /
                                                      len(complement_edges))
            probs = np.array([p_edge_inconn, p_edge_unrev, p_edge_notcomp])
            # if the total probability is greater than 1 the parameters
            # are invalid, so we renormalize to "fix" it.
            # if probs.sum() > 1:
            #     warnings.warn('probabilities sum to more than 1')
            #     probs = probs / probs.sum()
            pcumsum = probs.cumsum()
            # Determine which mutually exclusive state each complement edge is in
            # logger.info('pcumsum = %r' % (pcumsum,))
            states = np.searchsorted(pcumsum, rng.rand(len(complement_edges)))

            incon_idxs = np.where(states == 0)[0]
            if len(incon_idxs) > max_n_incon:
                logger.info('max_n_incon = %r' % (max_n_incon, ))
                chosen = rng.choice(incon_idxs, max_n_incon, replace=False)
                states[np.setdiff1d(incon_idxs, chosen)] = len(probs)

            grouped_edges = ut.group_items(complement_edges, states)
            for state, edges in grouped_edges.items():
                truth = POSTV
                if state == 0:
                    # Add in inconsistent edges
                    evidence_decision = NEGTV
                    # TODO: truth could be INCMP or POSTV
                    # new_edges.append((u, v, {'evidence_decision': NEGTV}))
                elif state == 1:
                    evidence_decision = UNREV
                    # TODO: truth could be INCMP or POSTV
                    # new_edges.append((u, v, {'evidence_decision': UNREV}))
                elif state == 2:
                    evidence_decision = INCMP
                    truth = INCMP
                else:
                    continue
                # Add in candidate edges
                attrs = {
                    'evidence_decision': evidence_decision,
                    'truth': truth
                }
                for (u, v) in edges:
                    g.add_edge(u, v, **attrs)
        new_ccs.append(g)
        # (list(g.nodes()), new_edges))

    pos_g = nx.union_all(new_ccs)
    assert len(new_ccs) == len(list(nx.connected_components(pos_g)))
    assert num_pccs == len(new_ccs)

    # Add edges between the PCCS
    neg_edges = []

    if not kwalias('ignore_pair', False):
        logger.info('making pairs')

        pair_attrs_lookup = {
            0: {
                'evidence_decision': NEGTV,
                'truth': NEGTV
            },
            1: {
                'evidence_decision': INCMP,
                'truth': INCMP
            },
            2: {
                'evidence_decision': UNREV,
                'truth': NEGTV
            },  # could be incomp or neg
        }

        # These are the probabilities that one edge has this state
        p_pair_neg = kwalias('p_pair_neg', 0.4)
        p_pair_incmp = kwalias('p_pair_incmp', 0.2)
        p_pair_unrev = kwalias('p_pair_unrev', 0)

        # p_pair_neg = 1
        cc_combos = ((list(g1.nodes()), list(g2.nodes()))
                     for (g1, g2) in it.combinations(new_ccs, 2))
        valid_cc_combos = [(cc1, cc2) for cc1, cc2 in cc_combos
                           if len(cc1) and len(cc2)]
        for cc1, cc2 in ut.ProgIter(valid_cc_combos, label='make neg-demo'):
            possible_edges = ut.estarmap(nx_utils.e_, it.product(cc1, cc2))
            # probability that any edge between these PCCs is negative
            n_edges = len(possible_edges)
            p_edge_neg = 1 - (1 - p_pair_neg)**(1 / n_edges)
            p_edge_incmp = 1 - (1 - p_pair_incmp)**(1 / n_edges)
            p_edge_unrev = 1 - (1 - p_pair_unrev)**(1 / n_edges)

            # Create event space with sizes proportional to probabilities
            pcumsum = np.cumsum([p_edge_neg, p_edge_incmp, p_edge_unrev])
            # Roll dice for each of the edge to see which state it lands on
            possible_pstate = rng.rand(len(possible_edges))
            states = np.searchsorted(pcumsum, possible_pstate)

            flags = states < len(pcumsum)
            stateful_states = states.compress(flags)
            stateful_edges = ut.compress(possible_edges, flags)

            unique_states, groupxs_list = vt.group_indices(stateful_states)
            for state, groupxs in zip(unique_states, groupxs_list):
                # logger.info('state = %r' % (state,))
                # Add in candidate edges
                edges = ut.take(stateful_edges, groupxs)
                attrs = pair_attrs_lookup[state]
                for (u, v) in edges:
                    neg_edges.append((u, v, attrs))
        logger.info('Made {} neg_edges between PCCS'.format(len(neg_edges)))
    else:
        logger.info('ignoring pairs')

    import wbia

    G = wbia.AnnotInference._graph_cls()
    G.add_nodes_from(pos_g.nodes(data=True))
    G.add_edges_from(pos_g.edges(data=True))
    G.add_edges_from(neg_edges)
    infr = wbia.AnnotInference.from_netx(G, infer=kwargs.get('infer', True))
    infr.verbose = 3

    infr.relabel_using_reviews(rectify=False)

    # fontname = 'Ubuntu'
    fontsize = 12
    fontname = 'sans'
    splines = 'spline'
    # splines = 'ortho'
    # splines = 'line'
    infr.set_node_attrs('shape', 'circle')
    infr.graph.graph['ignore_labels'] = True
    infr.graph.graph['dark_background'] = False
    infr.graph.graph['fontname'] = fontname
    infr.graph.graph['fontsize'] = fontsize
    infr.graph.graph['splines'] = splines
    infr.set_node_attrs('width', 29)
    infr.set_node_attrs('height', 29)
    infr.set_node_attrs('fontsize', fontsize)
    infr.set_node_attrs('fontname', fontname)
    infr.set_node_attrs('fixed_size', True)

    # Set synthetic ground-truth attributes for testing
    # infr.apply_edge_truth()
    infr.edge_truth = infr.get_edge_attrs('truth')
    # Make synthetic verif
    infr.dummy_verif = DummyVerif(infr)
    infr.verifiers = {}
    infr.verifiers['match_state'] = infr.dummy_verif
    infr.demokw = kwargs
    return infr
Example #37
0
def compute_sample_weight(class_weight, y, indices=None):
    """Estimate sample weights by class for unbalanced datasets.

    Parameters
    ----------
    class_weight : dict, list of dicts, "balanced", or None, optional
        Weights associated with classes in the form ``{class_label: weight}``.
        If not given, all classes are supposed to have weight one. For
        multi-output problems, a list of dicts can be provided in the same
        order as the columns of y.

        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data:
        ``n_samples / (n_classes * np.bincount(y))``.

        For multi-output, the weights of each column of y will be multiplied.

    y : array-like, shape = [n_samples] or [n_samples, n_outputs]
        Array of original class labels per sample.

    indices : array-like, shape (n_subsample,), or None
        Array of indices to be used in a subsample. Can be of length less than
        n_samples in the case of a subsample, or equal to n_samples in the
        case of a bootstrap subsample with repeated indices. If None, the
        sample weight will be calculated over the full sample. Only "auto" is
        supported for class_weight if this is provided.

    Returns
    -------
    sample_weight_vect : ndarray, shape (n_samples,)
        Array with sample weights as applied to the original y
    """

    y = np.atleast_1d(y)
    if y.ndim == 1:
        y = np.reshape(y, (-1, 1))
    n_outputs = y.shape[1]

    if isinstance(class_weight, six.string_types):
        if class_weight not in ['balanced', 'auto']:
            raise ValueError('The only valid preset for class_weight is '
                             '"balanced". Given "%s".' % class_weight)
    elif (indices is not None
          and not isinstance(class_weight, six.string_types)):
        raise ValueError('The only valid class_weight for subsampling is '
                         '"balanced". Given "%s".' % class_weight)
    elif n_outputs > 1:
        if (not hasattr(class_weight, "__iter__")
                or isinstance(class_weight, dict)):
            raise ValueError("For multi-output, class_weight should be a "
                             "list of dicts, or a valid string.")
        if len(class_weight) != n_outputs:
            raise ValueError("For multi-output, number of elements in "
                             "class_weight should match number of outputs.")

    expanded_class_weight = []
    for k in range(n_outputs):

        y_full = y[:, k]
        classes_full = np.unique(y_full)
        classes_missing = None

        if class_weight in ['balanced', 'auto'] or n_outputs == 1:
            class_weight_k = class_weight
        else:
            class_weight_k = class_weight[k]

        if indices is not None:
            # Get class weights for the subsample, covering all classes in
            # case some labels that were present in the original data are
            # missing from the sample.
            y_subsample = y[indices, k]
            classes_subsample = np.unique(y_subsample)

            weight_k = np.choose(np.searchsorted(classes_subsample,
                                                 classes_full),
                                 compute_class_weight(class_weight_k,
                                                      classes_subsample,
                                                      y_subsample),
                                 mode='clip')

            classes_missing = set(classes_full) - set(classes_subsample)
        else:
            weight_k = compute_class_weight(class_weight_k, classes_full,
                                            y_full)

        weight_k = weight_k[np.searchsorted(classes_full, y_full)]

        if classes_missing:
            # Make missing classes' weight zero
            weight_k[in1d(y_full, list(classes_missing))] = 0.

        expanded_class_weight.append(weight_k)

    expanded_class_weight = np.prod(expanded_class_weight,
                                    axis=0,
                                    dtype=np.float64)

    return expanded_class_weight
Example #38
0
    def __init__(self, ax, *args, **kwargs):
        global data, x, dark, incident
        global IntTime, Averages
        global xmin, xmax, ymin, ymax
        global AbMode, LEDdutycycle, LEDfrequency, LEDpin, LEDstate
        global monitorwave, monitorindex, monitor

        x = spec.wavelengths()
        # Integration time set above
        Averages = 1  #set default averages to a reasonable value
        dark = np.zeros(len(x))
        incident = np.ones(
            len(x)
        )  #dummy values to prevent error in Absorbance when no dark recorded
        LEDpin = 18  #BCM pin 18 is Physical pin 12 on the RPi
        LEDfrequency = 2000  # 2000 Hz is suitable for an integration time of 2 ms or longer
        LEDdutycycle = 50000  #50000 is 5%, scale is zero to 1 million, 0 is off
        LEDstate = 0  # LED off initially
        AbMode = 0  # initial mode is raw intensity
        self.ax = ax
        self.x = x
        self.xmin = xmin
        self.xmax = xmax
        self.ymin = ymin
        self.ymax = ymax
        self.data = data
        self.line = Line2D(self.x, self.data, color='red')
        self.ax.add_line(self.line)
        self.ax.set_ylim(ymin * 0.8, ymax * 1.1)
        self.ax.set_xlim(self.xmin, self.xmax)
        monitorwave = np.median(
            x)  #set monitor wavelength to middle of hardware range

        tk.Tk.__init__(self, *args, **kwargs)
        # tk.Tk.iconbitmap(self, default="clienticon.ico")  set window icon
        tk.Tk.wm_title(self, "Ocean Optics Spectrometer Control")
        container = tk.Frame(self)
        container.pack(side="top", fill="both", expand=True)
        container.grid_rowconfigure(0, weight=1)
        container.grid_columnconfigure(0, weight=1)

        label = tk.Label(self, text="Spectrometer on a Pi", font=LARGE_FONT)
        label.pack(pady=10, padx=10)

        self.frame1 = tk.Frame(self)
        self.frame1.pack(side='left', anchor=tk.N)
        labelint = tk.Label(self.frame1,
                            text='Integration Time (ms)',
                            relief='ridge')
        labelint.pack(side='top', pady=2)
        labelavg = tk.Label(self.frame1,
                            text='# of spectra to average',
                            relief='ridge',
                            width='17',
                            wraplength='100')
        labelavg.pack(side='top', pady=1)
        labelxmin = tk.Label(self.frame1,
                             text='Minimum wavelength',
                             relief='ridge')
        labelxmin.pack(side='top', pady=2)
        labelxmax = tk.Label(self.frame1,
                             text='Maximum wavelength',
                             relief='ridge')
        labelxmax.pack(side='top', pady=2)
        self.button_dark = tk.Button(self.frame1,
                                     text='Measure Dark',
                                     background='light grey')
        self.button_dark.pack(side='top', pady=2)
        self.button_dark.bind('<ButtonRelease-1>', self.getdark)
        self.buttonAbMode = tk.Button(self.frame1,
                                      text='Absorbance Mode (off)',
                                      background='light grey')
        self.buttonAbMode.pack(side='top', pady=1)
        self.buttonAbMode.bind('<ButtonRelease-1>', self.AbMode)

        monitorindex = np.searchsorted(x, monitorwave, side='left')
        monitor = np.round(self.data[monitorindex], decimals=3)
        self.text = self.ax.text(0.9,
                                 0.9,
                                 monitor,
                                 transform=ax.transAxes,
                                 fontsize=14)
        self.ax.axvline(x=monitorwave, lw=2, color='blue', alpha=0.5)

        self.labelmonitor = tk.Label(self.frame1,
                                     text='Wavelength to monitor (nm)',
                                     font=LARGE_FONT)
        self.labelmonitor.pack(side='top')
        self.entrymonitor = tk.Entry(self.frame1, width='7')
        self.entrymonitor.pack(side='top', pady=1, anchor=tk.N)
        self.entrymonitor.insert(0, np.round(x[monitorindex], decimals=2))
        self.entrymonitor.bind('<Return>', self.entrymonitor_return)
        self.labelmonitor2 = tk.Label(
            self.frame1, text="press <Enter> to set new wavelength")
        self.labelmonitor2.pack(side='top')
        self.button_reset_y = tk.Button(self.frame1,
                                        text='Reset Y axis scale',
                                        background='light blue')
        self.button_reset_y.pack(side='top', pady=10)
        self.button_reset_y.bind('<ButtonRelease-1>', self.reset_y)
        self.buttonLED = tk.Button(self.frame1,
                                   text='LED on / off',
                                   background='light grey')
        self.buttonLED.pack(side='top', pady=1)
        self.buttonLED.bind('<ButtonRelease-1>', self.LEDstate)

        self.labelLED = tk.Label(self.frame1, text="LED power (1 to 100%)")
        self.labelLED.pack(side='top', pady=1, anchor=tk.N)
        self.entryLED = tk.Entry(self.frame1, width='5')
        self.entryLED.pack(side='top', pady=1, anchor=tk.N)
        self.entryLED.insert(0, LEDdutycycle / 10000)
        self.entryLED.bind('<Return>', self.entryLED_return)

        self.frame2 = tk.Frame(self)
        self.frame2.pack(side='left', anchor=tk.N)
        self.entryint = tk.Entry(self.frame2, width='6')
        self.entryint.pack(side='top', pady=1, anchor=tk.N)
        self.entryint.insert(0, IntTime / 1000)
        self.entryint.bind('<Return>', self.EntryInt_return)
        self.entryavg = tk.Entry(self.frame2, width='4')
        self.entryavg.pack(side='top', pady=5)
        self.entryavg.insert(0, Averages)
        self.entryavg.bind('<Return>', self.EntryAvg_return)
        self.entryxmin = tk.Entry(self.frame2, width='7')
        self.entryxmin.pack(side='top', pady=2)
        self.entryxmin.insert(0, xmin)
        self.entryxmin.bind('<Return>', self.Entryxmin_return)
        self.entryxmax = tk.Entry(self.frame2, width='7')
        self.entryxmax.pack(side='top', pady=2)
        self.entryxmax.insert(0, xmax)
        self.entryxmax.bind('<Return>', self.Entryxmax_return)
        self.button_incident = tk.Button(self.frame2,
                                         text='Measure 100% T',
                                         background='light grey')
        self.button_incident.pack(side='top', pady=2)
        self.button_incident.bind('<ButtonRelease-1>', self.getincident)

        button_quit = ttk.Button(self, text='Quit')
        button_quit.pack(side='right', anchor=tk.N)
        button_quit.bind('<ButtonRelease-1>', self.ButtonQuit)

        ax.set_xlabel('Wavelength (nm)')
        ax.set_ylabel('Counts')

        canvas = FigureCanvasTkAgg(fig, self)
        canvas.get_tk_widget().pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True)
Example #39
0
 def eid_to_element_node_index(self, eids):
     ind = np.searchsorted(eids, self.element)
     return ind
def _interpolate_height_and_temperature_from_pressure(imager_obj,
                                                      level,
                                                      list_of_levels=None):
    """ Function to find height att pressure level (level)
    from segment_nwp, pressure and height vectors.
    High means high in pressure. The level closest to ground i hi, and lo is at lower
    pressure further up in atmosphere.
    """
    if hasattr(imager_obj, "nwp_height") and imager_obj.nwp_height is not None:
        values_h = imager_obj.nwp_height
        pressure_v = imager_obj.nwp_pressure
        surface_h = imager_obj.nwp_surface_h
        psur = imager_obj.nwp_psur
    elif hasattr(imager_obj, "segment_nwp_geoheight"
                 ) and imager_obj.segment_nwp_geoheight is not None:
        values_h = imager_obj.segment_nwp_geoheight
        pressure_v = imager_obj.segment_nwp_pressure
        surface_h = imager_obj.segment_nwp_surfaceGeoHeight
        psur = imager_obj.segment_nwp_surfacePressure
    else:
        return None
    # import pdb
    # pdb.set_trace()
    nlev = pressure_v.shape[1]
    npix = pressure_v.shape[0]
    k = np.arange(npix)
    if list_of_levels is None:
        higher_index = np.array([
            nlev - 1 - np.searchsorted(pressure_v[ind, :],
                                       level,
                                       side='right',
                                       sorter=range(nlev - 1, -1, -1))
            for ind in range(npix)
        ])
    else:
        higher_index = np.array([
            nlev - 1 - np.searchsorted(pressure_v[ind, :],
                                       list_of_levels[ind],
                                       side='right',
                                       sorter=range(nlev - 1, -1, -1))
            for ind in range(npix)
        ])
        level = list_of_levels
    higher_index[higher_index >= (nlev - 1)] = nlev - 2
    lower_index = higher_index + 1
    # update "lo" where level is between surface and first level in array
    below_level_1 = level > pressure_v[:, 0]
    lower_index[below_level_1] = 0
    # get pressure and height for layer below and above level
    hi = pressure_v[k, higher_index]
    lo = pressure_v[k, lower_index]
    height_hi_ = values_h[k, higher_index] * 1.0
    height_lo_ = values_h[k, lower_index] * 1.0
    # update "hi" where level is between surface and first level in array
    hi[below_level_1] = psur[below_level_1]
    height_hi_[below_level_1] = surface_h[below_level_1]
    # log pressures
    hi = np.log(hi)
    lo = np.log(lo)
    level = np.log(level)
    # interpolate
    out_h = height_hi_ - (hi - level) * (height_hi_ - height_lo_) / (hi - lo)
    return out_h
Example #41
0
def _jprimes(x, i, x_bounds=None):
    """
    Helper function to return the j' indices for the master curve fit

    This function is a helper function for :py:func:`quality`. It is not
    supposed to be called directly.

    Parameters
    ----------
    x : mapping to ndarrays
        The x values.

    i : int
        The row index (finite size index)

    x_bounds : 2-tuple, optional
        bounds on x values

    Returns
    -------
    ret : mapping to ndarrays
        Has the same keys and shape as `x`.
        Its element ``ret[i'][j]`` is the j' such that :math:`x_{i'j'} \leq
        x_{ij} < x_{i'(j'+1)}`.
        If no such j' exists, the element is np.nan.
        Convert the element to int to use as an index.
    """

    j_primes = - np.ones_like(x)

    try:
        x_masked = ma.masked_outside(x, x_bounds[0], x_bounds[1])
    except (TypeError, IndexError):
        x_masked = ma.asanyarray(x)

    k, n = x.shape

    # indices of lower and upper bounds
    edges = ma.notmasked_edges(x_masked, axis=1)
    x_lower = np.zeros(k, dtype=int)
    x_upper = np.zeros(k, dtype=int)
    x_lower[edges[0][0]] = edges[0][-1]
    x_upper[edges[-1][0]] = edges[-1][-1]

    for i_prime in range(k):
        if i_prime == i:
            j_primes[i_prime][:] = np.nan
            continue

        jprimes = np.searchsorted(
            x[i_prime], x[i], side='right'
        ).astype(float) - 1
        jprimes[
            np.logical_or(
                jprimes < x_lower[i_prime],
                jprimes >= x_upper[i_prime]
            )
        ] = np.nan
        j_primes[i_prime][:] = jprimes

    return j_primes
def streamplot(axes, x, y, u, v, density=1, linewidth=None, color=None,
               cmap=None, norm=None, arrowsize=1, arrowstyle='-|>',
               minlength=0.1, transform=None, zorder=None, start_points=None,
               maxlength=4.0, integration_direction='both'):
    """Draws streamlines of a vector flow.

    *x*, *y* : 1d arrays
        an *evenly spaced* grid.
    *u*, *v* : 2d arrays
        x and y-velocities. Number of rows should match length of y, and
        the number of columns should match x.
    *density* : float or 2-tuple
        Controls the closeness of streamlines. When `density = 1`, the domain
        is divided into a 30x30 grid---*density* linearly scales this grid.
        Each cell in the grid can have, at most, one traversing streamline.
        For different densities in each direction, use [density_x, density_y].
    *linewidth* : numeric or 2d array
        vary linewidth when given a 2d array with the same shape as velocities.
    *color* : matplotlib color code, or 2d array
        Streamline color. When given an array with the same shape as
        velocities, *color* values are converted to colors using *cmap*.
    *cmap* : :class:`~matplotlib.colors.Colormap`
        Colormap used to plot streamlines and arrows. Only necessary when using
        an array input for *color*.
    *norm* : :class:`~matplotlib.colors.Normalize`
        Normalize object used to scale luminance data to 0, 1. If None, stretch
        (min, max) to (0, 1). Only necessary when *color* is an array.
    *arrowsize* : float
        Factor scale arrow size.
    *arrowstyle* : str
        Arrow style specification.
        See :class:`~matplotlib.patches.FancyArrowPatch`.
    *minlength* : float
        Minimum length of streamline in axes coordinates.
    *start_points*: Nx2 array
        Coordinates of starting points for the streamlines.
        In data coordinates, the same as the ``x`` and ``y`` arrays.
    *zorder* : int
        any number
    *maxlength* : float
        Maximum length of streamline in axes coordinates.
    *integration_direction* : ['forward', 'backward', 'both']
        Integrate the streamline in forward, backward or both directions.

    Returns:

        *stream_container* : StreamplotSet
            Container object with attributes

                - lines: `matplotlib.collections.LineCollection` of streamlines

                - arrows: collection of `matplotlib.patches.FancyArrowPatch`
                  objects representing arrows half-way along stream
                  lines.

            This container will probably change in the future to allow changes
            to the colormap, alpha, etc. for both lines and arrows, but these
            changes should be backward compatible.

    """
    grid = Grid(x, y)
    mask = StreamMask(density)
    dmap = DomainMap(grid, mask)

    if zorder is None:
        zorder = mlines.Line2D.zorder

    # default to data coordinates
    if transform is None:
        transform = axes.transData

    if color is None:
        color = axes._get_lines.get_next_color()

    if linewidth is None:
        linewidth = matplotlib.rcParams['lines.linewidth']

    line_kw = {}
    arrow_kw = dict(arrowstyle=arrowstyle, mutation_scale=10 * arrowsize)

    if integration_direction not in ['both', 'forward', 'backward']:
        errstr = ("Integration direction '%s' not recognised. "
                  "Expected 'both', 'forward' or 'backward'." %
                  integration_direction)
        raise ValueError(errstr)

    if integration_direction == 'both':
        maxlength /= 2.

    use_multicolor_lines = isinstance(color, np.ndarray)
    if use_multicolor_lines:
        if color.shape != grid.shape:
            raise ValueError(
                "If 'color' is given, must have the shape of 'Grid(x,y)'")
        line_colors = []
        color = np.ma.masked_invalid(color)
    else:
        line_kw['color'] = color
        arrow_kw['color'] = color

    if isinstance(linewidth, np.ndarray):
        if linewidth.shape != grid.shape:
            raise ValueError(
                "If 'linewidth' is given, must have the shape of 'Grid(x,y)'")
        line_kw['linewidth'] = []
    else:
        line_kw['linewidth'] = linewidth
        arrow_kw['linewidth'] = linewidth

    line_kw['zorder'] = zorder
    arrow_kw['zorder'] = zorder

    ## Sanity checks.
    if u.shape != grid.shape or v.shape != grid.shape:
        raise ValueError("'u' and 'v' must be of shape 'Grid(x,y)'")

    u = np.ma.masked_invalid(u)
    v = np.ma.masked_invalid(v)

    integrate = get_integrator(u, v, dmap, minlength, maxlength,
                               integration_direction)

    trajectories = []
    if start_points is None:
        for xm, ym in _gen_starting_points(mask.shape):
            if mask[ym, xm] == 0:
                xg, yg = dmap.mask2grid(xm, ym)
                t = integrate(xg, yg)
                if t is not None:
                    trajectories.append(t)
    else:
        sp2 = np.asanyarray(start_points, dtype=float).copy()

        # Check if start_points are outside the data boundaries
        for xs, ys in sp2:
            if not (grid.x_origin <= xs <= grid.x_origin + grid.width
                    and grid.y_origin <= ys <= grid.y_origin + grid.height):
                raise ValueError("Starting point ({}, {}) outside of data "
                                 "boundaries".format(xs, ys))

        # Convert start_points from data to array coords
        # Shift the seed points from the bottom left of the data so that
        # data2grid works properly.
        sp2[:, 0] -= grid.x_origin
        sp2[:, 1] -= grid.y_origin

        for xs, ys in sp2:
            xg, yg = dmap.data2grid(xs, ys)
            t = integrate(xg, yg)
            if t is not None:
                trajectories.append(t)

    if use_multicolor_lines:
        if norm is None:
            norm = mcolors.Normalize(color.min(), color.max())
        if cmap is None:
            cmap = cm.get_cmap(matplotlib.rcParams['image.cmap'])
        else:
            cmap = cm.get_cmap(cmap)

    streamlines = []
    arrows = []
    for t in trajectories:
        tgx = np.array(t[0])
        tgy = np.array(t[1])
        # Rescale from grid-coordinates to data-coordinates.
        tx, ty = dmap.grid2data(*np.array(t))
        tx += grid.x_origin
        ty += grid.y_origin

        points = np.transpose([tx, ty]).reshape(-1, 1, 2)
        streamlines.extend(np.hstack([points[:-1], points[1:]]))

        # Add arrows half way along each trajectory.
        s = np.cumsum(np.sqrt(np.diff(tx) ** 2 + np.diff(ty) ** 2))
        n = np.searchsorted(s, s[-1] / 2.)
        arrow_tail = (tx[n], ty[n])
        arrow_head = (np.mean(tx[n:n + 2]), np.mean(ty[n:n + 2]))

        if isinstance(linewidth, np.ndarray):
            line_widths = interpgrid(linewidth, tgx, tgy)[:-1]
            line_kw['linewidth'].extend(line_widths)
            arrow_kw['linewidth'] = line_widths[n]

        if use_multicolor_lines:
            color_values = interpgrid(color, tgx, tgy)[:-1]
            line_colors.append(color_values)
            arrow_kw['color'] = cmap(norm(color_values[n]))

        p = patches.FancyArrowPatch(
            arrow_tail, arrow_head, transform=transform, **arrow_kw)
        axes.add_patch(p)
        arrows.append(p)

    lc = mcollections.LineCollection(
        streamlines, transform=transform, **line_kw)
    lc.sticky_edges.x[:] = [grid.x_origin, grid.x_origin + grid.width]
    lc.sticky_edges.y[:] = [grid.y_origin, grid.y_origin + grid.height]
    if use_multicolor_lines:
        lc.set_array(np.ma.hstack(line_colors))
        lc.set_cmap(cmap)
        lc.set_norm(norm)
    axes.add_collection(lc)
    axes.autoscale_view()

    ac = matplotlib.collections.PatchCollection(arrows)
    stream_container = StreamplotSet(lc, ac)
    return stream_container
Example #43
0
    def run(self, dataSlice, slicePoint=None):
        """"
        Calculate the detectability of a transient with the specified lightcurve.

        Parameters
        ----------
        dataSlice : numpy.array
            Numpy structured array containing the data related to the visits provided by the slicer.
        slicePoint : dict, optional
            Dictionary containing information about the slicepoint currently active in the slicer.

        Returns
        -------
        float
            The total number of transients that could be detected.
        """
        # Total number of transients that could go off back-to-back
        if self.countMethod == 'partialLC':
            _nTransMax = np.ceil(self.surveyDuration /
                                 (self.transDuration / 365.25))
        else:
            _nTransMax = np.floor(self.surveyDuration /
                                  (self.transDuration / 365.25))
        tshifts = np.arange(self.nPhaseCheck) * self.transDuration / float(
            self.nPhaseCheck)
        nDetected = 0
        nTransMax = 0
        for tshift in tshifts:
            # Compute the total number of back-to-back transients are possible to detect
            # given the survey duration and the transient duration.
            nTransMax += _nTransMax
            if tshift != 0:
                nTransMax -= 1
            if self.surveyStart is None:
                surveyStart = dataSlice[self.mjdCol].min()
            time = (dataSlice[self.mjdCol] - surveyStart +
                    tshift) % self.transDuration

            # Which lightcurve does each point belong to
            lcNumber = np.floor(
                (dataSlice[self.mjdCol] - surveyStart) / self.transDuration)

            lcMags = self.lightCurve(time, dataSlice[self.filterCol])

            # How many criteria needs to be passed
            detectThresh = 0

            # Flag points that are above the SNR limit
            detected = np.zeros(dataSlice.size, dtype=int)
            detected[np.where(
                lcMags < dataSlice[self.m5Col] + self.detectM5Plus)] += 1
            detectThresh += 1

            # If we demand points on the rise
            if self.nPrePeak > 0:
                detectThresh += 1
                ord = np.argsort(dataSlice[self.mjdCol])
                dataSlice = dataSlice[ord]
                detected = detected[ord]
                lcNumber = lcNumber[ord]
                time = time[ord]
                ulcNumber = np.unique(lcNumber)
                left = np.searchsorted(lcNumber, ulcNumber)
                right = np.searchsorted(lcNumber, ulcNumber, side='right')
                # Note here I'm using np.searchsorted to basically do a 'group by'
                # might be clearer to use scipy.ndimage.measurements.find_objects or pandas, but
                # this numpy function is known for being efficient.
                for le, ri in zip(left, right):
                    # Number of points where there are a detection
                    good = np.where(time[le:ri] < self.peakTime)
                    nd = np.sum(detected[le:ri][good])
                    if nd >= self.nPrePeak:
                        detected[le:ri] += 1

            # Check if we need multiple points per light curve or multiple filters
            if (self.nPerLC > 1) | (self.nFilters > 1):
                # make sure things are sorted by time
                ord = np.argsort(dataSlice[self.mjdCol])
                dataSlice = dataSlice[ord]
                detected = detected[ord]
                lcNumber = lcNumber[ord]
                time = time[ord]
                ulcNumber = np.unique(lcNumber)
                left = np.searchsorted(lcNumber, ulcNumber)
                right = np.searchsorted(lcNumber, ulcNumber, side='right')
                detectThresh += self.nFilters

                for le, ri in zip(left, right):
                    points = np.where(detected[le:ri] > 0)
                    ufilters = np.unique(
                        dataSlice[self.filterCol][le:ri][points])
                    phaseSections = np.floor(time[le:ri][points] /
                                             self.transDuration * self.nPerLC)
                    for filtName in ufilters:
                        good = np.where(dataSlice[self.filterCol][le:ri]
                                        [points] == filtName)
                        if np.size(np.unique(
                                phaseSections[good])) >= self.nPerLC:
                            detected[le:ri] += 1

            # Find the unique number of light curves that passed the required number of conditions
            nDetected += np.size(
                np.unique(lcNumber[np.where(detected >= detectThresh)]))

        # Rather than keeping a single "detected" variable, maybe make a mask for each criteria, then
        # reduce functions like: reduce_singleDetect, reduce_NDetect, reduce_PerLC, reduce_perFilter.
        # The way I'm running now it would speed things up.

        return float(nDetected) / nTransMax
Example #44
0
 def get_element_index(self, eids):
     itot = np.searchsorted(eids, self.element)
     return itot
Example #45
0
 def weighted_pick(weights):
     t = np.cumsum(weights)
     s = np.sum(weights)
     return (int(np.searchsorted(t, np.random.rand(1) * s)))
Example #46
0
    def disaggregate_poe(self, sctx, rctx, dctx, imt, iml, truncation_level,
                         n_epsilons):
        """
        Disaggregate (separate) PoE of ``iml`` in different contributions
        each coming from ``n_epsilons`` distribution bins.

        If ``truncation_level = 3``, ``n_epsilons = 3``, bin edges are
        ``-3 .. -1``, ``-1 .. +1`` and ``+1 .. +3``.

        :param n_epsilons:
            Integer number of bins to split truncated Gaussian distribution to.

        Other parameters are the same as for :meth:`get_poes`, with
        differences that ``iml`` is only one single intensity level
        and ``truncation_level`` is required to be positive.

        :returns:
            Contribution to probability of exceedance of ``iml`` coming
            from different sigma bands in a form of 1d numpy array with
            ``n_epsilons`` floats between 0 and 1.
        """
        if not truncation_level > 0:
            raise ValueError('truncation level must be positive')
        self._check_imt(imt)

        # compute mean and standard deviations
        mean, [stddev] = self.get_mean_and_stddevs(sctx, rctx, dctx, imt,
                                                   [const.StdDev.TOTAL])

        # compute iml value with respect to standard (mean=0, std=1)
        # normal distributions
        iml = self.to_distribution_values(iml)
        standard_imls = (iml - mean) / stddev

        distribution = scipy.stats.truncnorm(-truncation_level,
                                             truncation_level)
        epsilons = numpy.linspace(-truncation_level, truncation_level,
                                  n_epsilons + 1)
        # compute epsilon bins contributions
        contribution_by_bands = (distribution.cdf(epsilons[1:]) -
                                 distribution.cdf(epsilons[:-1]))

        # take the minimum epsilon larger than standard_iml
        iml_bin_indices = numpy.searchsorted(epsilons, standard_imls)

        return numpy.array([
            # take full disaggregated distribution for the case of
            # ``iml <= mean - truncation_level * stddev``
            contribution_by_bands if idx == 0 else

            # take zeros if ``iml >= mean + truncation_level * stddev``
            numpy.zeros(n_epsilons) if idx >= n_epsilons + 1 else

            # for other cases (when ``iml`` falls somewhere in the
            # histogram):
            numpy.concatenate((
                # take zeros for bins that are on the left hand side
                # from the bin ``iml`` falls into,
                numpy.zeros(idx - 1),
                # ... area of the portion of the bin containing ``iml``
                # (the portion is limited on the left hand side by
                # ``iml`` and on the right hand side by the bin edge),
                [
                    distribution.sf(standard_imls[i]) -
                    contribution_by_bands[idx:].sum()
                ],
                # ... and all bins on the right go unchanged.
                contribution_by_bands[idx:]))
            for i, idx in enumerate(iml_bin_indices)
        ])
Example #47
0
def _first_brillouin_zone(eigenvalues, eigenvectors, n_values, edge):
    """
    Return the `n_values` eigenvalues (and corresponding eigenvectors) which
    fall within the first "Brillioun zone" whos edge is `edge`.  This function
    takes care to select values from only one edge of the zone, and raises a
    `RuntimeError` if it cannot safely do so.

    The inputs `eigenvalues` and `edge` must be rounded to the desired
    precision for this function.

    Arguments:
    eigenvalues: 1D np.array of float --
        The eigenvalues to choose from.  This should be rounded to the desired
        precision (because the floating-point values will be compared directly
        to the edge value).

    eigenvectors: 2D np.array of complex --
        The eigenvectors corresponding to the eigenvalues.  The first index runs
        over the number of vectors, so `eigenvalues[i]` is the eigenvalue
        corresponding to the eigenvector `eigenvectors[i]`.  Note: this is the
        transpose of the return value of `np.linalg.eig` and family.

    n_values: int -- The number of eigenvalues to find.

    edge: float --
        The edge of the first Brillioun zone.  This should be rounded to the
        desired precision.

    Returns:
    eigenvalues: 1D np.array of float -- The selected eigenvalues (sorted).
    eigenvectors: 2D np.array of complex --
        The eigenvectors corresponding to the selected eigenvalues.  The first
        index corresponds to the index of the `eigenvalues` output.
    """
    order = eigenvalues.argsort()
    eigenvalues = eigenvalues[order]
    eigenvectors = eigenvectors[order]
    lower = np.searchsorted(eigenvalues, -edge, side='left')
    upper = np.searchsorted(eigenvalues, edge, side='right')
    n_lower_edge = n_upper_edge = 0
    while eigenvalues[lower + n_lower_edge] == -edge:
        n_lower_edge += 1
    # Additional `-1` because `searchsorted(side='right')` gives us the index
    # after the found element.
    while eigenvalues[upper - n_upper_edge - 1] == edge:
        n_upper_edge += 1
    n_not_on_edge = (upper - n_upper_edge) - (lower + n_lower_edge)
    log_message = " ".join([
        f"Needed {n_values} eigenvalues in the first zone.",
        f"Found {n_lower_edge}, {n_not_on_edge}, {n_upper_edge} on the",
        "lower edge, centre zone, upper edge respectively.",
    ])
    _log.debug(log_message)
    if n_not_on_edge == n_values:
        lower, upper = lower + n_lower_edge, upper - n_upper_edge
    elif n_not_on_edge + n_lower_edge == n_values:
        lower, upper = lower, upper - n_upper_edge
    elif n_not_on_edge + n_upper_edge == n_values:
        lower, upper = lower + n_lower_edge, upper
    else:
        exception_message = " ".join([
            "Could not resolve the first Brillouin zone safely.",
            "You could try increasing the tolerance (decreasing the 'decimals'",
            "field), or adding a small constant term to your Hamiltonian.",
        ])
        raise RuntimeError(exception_message)
    return eigenvalues[lower:upper], eigenvectors[lower:upper]
Example #48
0
def adaptive_parzen_normal(mus,
                           prior_weight,
                           prior_mu,
                           prior_sigma,
                           LF=DEFAULT_LF):
    """
    mus - matrix (N, M) of M, N-dimensional component centers
    """
    mus = np.array(mus)
    assert str(mus.dtype) != 'object'

    if mus.ndim != 1:
        raise TypeError('mus must be vector', mus)
    if len(mus) == 0:
        srtd_mus = np.asarray([prior_mu])
        sigma = np.asarray([prior_sigma])
        prior_pos = 0
    elif len(mus) == 1:
        if prior_mu < mus[0]:
            prior_pos = 0
            srtd_mus = np.asarray([prior_mu, mus[0]])
            sigma = np.asarray([prior_sigma, prior_sigma * .5])
        else:
            prior_pos = 1
            srtd_mus = np.asarray([mus[0], prior_mu])
            sigma = np.asarray([prior_sigma * .5, prior_sigma])
    elif len(mus) >= 2:

        # create new_mus, which is sorted, and in which
        # the prior has been inserted
        order = np.argsort(mus)
        prior_pos = np.searchsorted(mus[order], prior_mu)
        srtd_mus = np.zeros(len(mus) + 1)
        srtd_mus[:prior_pos] = mus[order[:prior_pos]]
        srtd_mus[prior_pos] = prior_mu
        srtd_mus[prior_pos + 1:] = mus[order[prior_pos:]]
        sigma = np.zeros_like(srtd_mus)
        sigma[1:-1] = np.maximum(srtd_mus[1:-1] - srtd_mus[0:-2],
                                 srtd_mus[2:] - srtd_mus[1:-1])
        lsigma = srtd_mus[1] - srtd_mus[0]
        usigma = srtd_mus[-1] - srtd_mus[-2]
        sigma[0] = lsigma
        sigma[-1] = usigma

    if LF and LF < len(mus):
        unsrtd_weights = linear_forgetting_weights(len(mus), LF)
        srtd_weights = np.zeros_like(srtd_mus)
        assert len(unsrtd_weights) + 1 == len(srtd_mus)
        srtd_weights[:prior_pos] = unsrtd_weights[order[:prior_pos]]
        srtd_weights[prior_pos] = prior_weight
        srtd_weights[prior_pos + 1:] = unsrtd_weights[order[prior_pos:]]

    else:
        srtd_weights = np.ones(len(srtd_mus))
        srtd_weights[prior_pos] = prior_weight

    # -- magic formula:
    maxsigma = old_div(prior_sigma, 1.0)
    minsigma = old_div(prior_sigma, min(100.0, (1.0 + len(srtd_mus))))

    sigma = np.clip(sigma, minsigma, maxsigma)

    sigma[prior_pos] = prior_sigma
    assert prior_sigma > 0
    assert maxsigma > 0
    assert minsigma > 0
    assert np.all(sigma > 0), (sigma.min(), minsigma, maxsigma)

    srtd_weights /= srtd_weights.sum()
    if 0:
        print('WEIGHTS', srtd_weights)
        print('MUS', srtd_mus)
        print('SIGMA', sigma)

    return srtd_weights, srtd_mus, sigma
 def result(x):
     return np.searchsorted(selection, x, side='right') / selection.size
Example #50
0
 def test_searchsortedOp_on_right_side(self):
     f = aesara.function([self.x, self.v],
                         searchsorted(self.x, self.v, side="right"))
     assert np.allclose(np.searchsorted(self.a, self.b, side="right"),
                        f(self.a, self.b))
Example #51
0
def process_week(config, source, week_file):
    """Process a single week file

    * Retrieve the file, extracting the photon and spacecraft info
    * Select photons near the source,
    * Determine exposure for the direction
    * Use the weight table to add weights to photon data, selecting photons with weight info
    -- in progress --
    * Use the exposure to assign an exposure to each photon.


    """

    with open(week_file, 'rb') as inp:
        week = pickle.load(inp)

    pdf = _get_photons_near_source(config, source, week)
    edf = _calculate_exposure_for_source(config, source, week)
    if config.verbose > 2:
        print(f'\n\t-->Selected {len(pdf)} photons')

    # add weights
    if pdf is None or len(pdf) < 3 or len(edf) == 0:
        return None, edf

    add_weights(config, pdf, source)

    if 'run_id' in pdf:
        # expint = np.empty(2*len(edf))
        estart = edf.start.values
        estop = edf.stop.values
        exptime = np.append(estart, estop[-1])
        expval = edf.exp.values
        expcth = edf.cos_theta.values

        # corresponding cumulative exposure -- in m^2
        cumexp = np.insert(np.cumsum(edf.exp.values / 1e4), 0, 0)
        # i = np.searchsorted(expint[0::2], MJD(pdf.iloc[0].run_id) )

        runs = pdf.groupby('run_id')
        last_run = 0
        tau = []
        time = []
        run_id = []
        for run, g in runs:
            assert run > last_run
            run_id += [run] * len(g)
            last_run = run

            # assemble MJD time from run_id and trun
            runstart = MJD(float(run))
            rtime = MJD(float(run) + g.trun * config.offset_size)
            time += list(rtime)

            # cumexp at run start
            run_cumexp = cumexp[np.searchsorted(estart, runstart)]

            # use event times in this run to interpolate table of exposure times, cumexp
            event_cumexp = np.interp(rtime, exptime, cumexp)

            # diffs, from first --> tau
            event_exp = np.diff(np.insert(event_cumexp, 0, run_cumexp))
            tau += list(event_exp)

#             # extract cos_theta at event_time? should interplate maybe
#             cth += expcth[np.searchsorted(rtime, estart )]

# update pdf
        pdf.loc[:, 'tau'] = np.array(tau, np.float32)
        pdf.loc[:, 'time'] = time
        pdf.drop(columns='trun', inplace=True)
        pdf.loc[:, 'run_id'] = pd.Categorical(run_id)

    else:  # zap legacy for now
        for check in 'etime event run_diff rtime run'.split():
            if check in pdf:
                if config.verbose > 2: print(f'remove {check}')
                pdf.drop(columns=check, inplace=True)

    # final attempt to do this
    pdf.loc[:, 'weight'] = pdf['weight'].astype(np.float32)

    return pdf, edf
Example #52
0
    def R_cb(self,
             x,
             bound='two-sided',
             interp='step',
             alpha_ci=0.05,
             bound_type='exp',
             dist='z'):

        if bound_type not in ['exp', 'normal']:
            return ValueError("'bound_type' must be in ['exp', 'normal']")
        if dist not in ['t', 'z']:
            return ValueError("'dist' must be in ['t', 'z']")

        confidence = 1. - alpha_ci

        old_err_state = np.seterr(all='ignore')

        x = np.atleast_1d(x)
        if bound in ['upper', 'lower']:
            if dist == 't':
                stat = t.ppf(1 - confidence, self.r - 1)
            else:
                stat = norm.ppf(1 - confidence, 0, 1)
            if bound == 'upper':
                stat = -stat
        elif bound == 'two-sided':
            if dist == 't':
                stat = t.ppf((1 - confidence) / 2, self.r - 1)
            else:
                stat = norm.ppf((1 - confidence) / 2, 0, 1)
            stat = np.array([-1, 1]).reshape(2, 1) * stat

        if bound_type == 'exp':
            # Exponential Greenwood confidence
            R_out = self.greenwood * 1. / (np.log(self.R)**2)
            R_out = np.log(-np.log(self.R)) - stat * np.sqrt(R_out)
            R_out = np.exp(-np.exp(R_out))
        else:
            # Normal Greenwood confidence
            R_out = self.R + np.sqrt(self.greenwood * self.R**2) * stat

        if interp == 'step':
            idx = np.searchsorted(self.x, x, side='right') - 1
            if bound == 'two-sided':
                R_out = R_out[:, idx]
                R_out = np.where(idx < 0, 1, R_out)
            else:
                R_out = R_out[idx]
                R_out = np.where(idx < 0, 1, R_out)

        else:
            if bound == 'two-sided':
                R1 = interp1d(self.x, R_out[0, :], kind=interp)(x)
                R2 = interp1d(self.x, R_out[1, :], kind=interp)(x)
                R_out = np.vstack([R1, R2])
            else:
                R_out = interp1d(self.x, R_out, kind=interp)(x)

        if R_out.ndim == 2:
            min_idx = (x < self.x.min())
            max_idx = (x > self.x.max())
            # print(R_out[0, :])
            R_out[0, :][min_idx] = 1
            R_out[0, :][max_idx] = 1
            R_out[1, :][min_idx] = 0
            R_out[1, :][max_idx] = 0
            R_out = R_out.T
            # print(R_out)
            # nan_idx = ((x < self.x.min()) | (x > self.x.max()))
            # R_out[nan_i3dx] = np.nan
        else:
            min_idx = (x < self.x.min())
            max_idx = (x > self.x.max())
            R_out[min_idx] = 0
            R_out[max_idx] = 1

        np.seterr(**old_err_state)

        return R_out
Example #53
0
 def split_tracklet(tracklet, inds):
     idx = sorted(set(np.searchsorted(tracklet.inds, inds)))
     inds_new = np.split(tracklet.inds, idx)
     data_new = np.split(tracklet.data, idx)
     return [Tracklet(data, inds) for data, inds in zip(data_new, inds_new)]
Example #54
0
    def get_reflectance(self, sun_zenith, sat_zenith, azidiff, bandname, redband=None):
        """Get the reflectance from the three sun-sat angles."""
        # Get wavelength in nm for band:
        if isinstance(bandname, float):
            LOG.warning('A wavelength is provided instead of band name - ' +
                        'disregard the relative spectral responses and assume ' +
                        'it is the effective wavelength: %f (micro meter)', bandname)
            wvl = bandname * 1000.0
        else:
            wvl = self.get_effective_wavelength(bandname)
            wvl = wvl * 1000.0

        rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = self.get_reflectance_lut()

        # force dask arrays
        compute = False
        if HAVE_DASK and not isinstance(sun_zenith, Array):
            compute = True
            sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape)
            sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape)
            azidiff = from_array(azidiff, chunks=azidiff.shape)
            if redband is not None:
                redband = from_array(redband, chunks=redband.shape)

        clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max()))
        sun_zenith = clip(sun_zenith, 0, clip_angle)
        sunzsec = 1. / cos(deg2rad(sun_zenith))
        clip_angle = rad2deg(arccos(1. / satz_sec_coord.max()))
        sat_zenith = clip(sat_zenith, 0, clip_angle)
        satzsec = 1. / cos(deg2rad(sat_zenith))
        shape = sun_zenith.shape

        if not(wvl_coord.min() < wvl < wvl_coord.max()):
            LOG.warning(
                "Effective wavelength for band %s outside 400-800 nm range!",
                str(bandname))
            LOG.info(
                "Set the rayleigh/aerosol reflectance contribution to zero!")
            if HAVE_DASK:
                chunks = sun_zenith.chunks if redband is None else redband.chunks
                res = zeros(shape, chunks=chunks)
                return res.compute() if compute else res

            return zeros(shape)

        idx = np.searchsorted(wvl_coord, wvl)
        wvl1 = wvl_coord[idx - 1]
        wvl2 = wvl_coord[idx]

        fac = (wvl2 - wvl) / (wvl2 - wvl1)
        raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :]
        tic = time.time()

        smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]]
        smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]]
        orders = [
            len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)]
        f_3d_grid = atleast_2d(raylwvl.ravel())

        if HAVE_DASK and isinstance(smin[0], Array):
            # compute all of these at the same time before passing to the interpolator
            # otherwise they are computed separately
            smin, smax, orders, f_3d_grid = da.compute(smin, smax, orders, f_3d_grid)
        minterp = MultilinearInterpolator(smin, smax, orders)
        minterp.set_values(f_3d_grid)

        if HAVE_DASK:
            ipn = map_blocks(self._do_interp, minterp, sunzsec, azidiff,
                             satzsec, dtype=raylwvl.dtype, chunks=azidiff.chunks)
        else:
            ipn = self._do_interp(minterp, sunzsec, azidiff, satzsec)

        LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic))

        ipn *= 100
        res = ipn
        if redband is not None:
            res = where(redband < 20., res,
                        (1 - (redband - 20) / 80) * res)

        res = clip(res, 0, 100)
        if compute:
            res = res.compute()

        return res
Example #55
0
 def set_data_at(self, ind, data):
     self.data[np.searchsorted(self.inds, ind)] = data
def onlineAnalysis(config, getImgNormDistribution=False):

  ###################################
  #####  get background images  #####
  ###################################

  bkgImages = []
  reCenterImgs = []
  centerRsum = 0.
  centerCsum = 0.
  centerSumCount = 0.


  ###########################################
  #####  initialize analysis variables  #####
  ###########################################

  ###  initialize file lists  ###
  loadedFiles = []
  loadFiles = []
  queryFiles = []
  if config.doQueryFolder:
    queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)
  for fld in config.loadFolders:
    folderName = fld["folder"] + "/*" + config.fileExtention
    diffractionFiles = glob.glob(folderName)
    bkgImgFiles = [fld["background"]]*len(diffractionFiles)
    centerRs = [fld["centerR"]]*len(diffractionFiles)
    centerCs = [fld["centerC"]]*len(diffractionFiles)
    loadFiles = loadFiles + zip(diffractionFiles, bkgImgFiles, centerRs, centerCs)

  while (len(loadFiles) == 0) and (len(queryFiles) == 0):
    if not config.doQueryFolder:
      raise RuntimeError("ERROR: There are no files included in the load folders!")
    else:
      queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)
      while not len(queryFiles):
        print("There are no diffraction patterns under %s, will keep looking..." % config.queryFolder)
        time.sleep(10)
        queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)


  ###  initialize plots  ###
  if not getImgNormDistribution:
    plt.ion()
    Qrange = np.arange(config.NradialBins+1)*config.Qmax/(config.NradialBins)
    LOmin = np.searchsorted(Qrange, [config.LineOutMinQ])[0]
    LOmax = np.searchsorted(Qrange, [config.LineOutMaxQ])[0]
    fig = plt.figure(figsize=config.plotFigSize, dpi=config.dpi)

    plotGrid = (2,6)
    axCurDP = plt.subplot2grid(plotGrid, (0,0))
    #axCurDP.set_title("Current Diffraction")
    axCurDP.get_xaxis().set_visible(False)
    axCurDP.get_yaxis().set_visible(False)
    axCurDP.set_position([0.95,0.05, 0.05, 0.95])

    axSumDP = plt.subplot2grid(plotGrid, (1,0))
    #axSumDP.set_title("Aggregate Diffraction")
    axSumDP.get_xaxis().set_visible(False)
    axSumDP.get_yaxis().set_visible(False)
    axSumDP.set_position([0.95,0.05, 0.05, 0.95])

    axCurL0 = plt.subplot2grid(plotGrid, (0,1))
    axCurL0.set(xlabel=r'Q $[\AA^{-1}]$', ylabel="Legendre 0")

    axTotCN = plt.subplot2grid(plotGrid, (1,1))
    axTotCN.set(xlabel="Time", ylabel="Total Counts")

    axAllL0 = plt.subplot2grid(plotGrid, (0,2), colspan=2)
    axAllL0.set(xlabel="Time [ps]", ylabel=r'Q $[\AA^{-1}]$')

    axLinL0 = plt.subplot2grid(plotGrid, (1,2), colspan=2)
    axLinL0.set(xlabel="Time [ps]", ylabel="Legendre 0")

    axAllL2 = plt.subplot2grid(plotGrid, (0,4), colspan=2)
    axAllL2.set(xlabel="Time [ps]", ylabel=r'Q $[\AA^{-1}]$')

    axLinL2= plt.subplot2grid(plotGrid, (1,4), colspan=2)
    axLinL2.set(xlabel="Time [ps]", ylabel="Legendre 2")

    plotCurLeg, = axCurL0.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-")
    plotL0LO,   = axLinL0.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-")
    plotL2LO,   = axLinL2.plot(Qrange[:-1], np.zeros((config.NradialBins)), "k-")


  ###  image variables  ###
  aggregateImage = np.zeros((1024,1024), np.float64)
  imageSums = []
  NsumRejected = 0


  ###  initialize legendre variables  ###
  legCoeffDict = {}
  loadImgDict = {}
  averageLegCoeffArray = np.zeros((config.Nlegendres,1,config.NradialBins))

  initializeFiles = True
  if config.loadSavedResults:
    legCoeffDict, loadedFiles, averageLegCoeffArray =\
        load_results(config.loadSavedResultsFolder, config.loadSavedResultsFileName)
    delays = np.sort(np.array(legCoeffDict.keys()))
    initializeFiles = False

    # initialize loading variables with first new entry
    while len(loadFiles):
      fileName,_,_,_ = loadFiles[0]
      if fileName in loadedFiles:
        del loadFiles[0]
      else:
        info = get_image_info(fileName)
        delays = np.array([info.stageDelay])
        loadImgDict[info.stageDelay] = (0, 0)
        break

  while initializeFiles and\
      (initializeFiles or (len(loadFiles) is 0) or (len(queryFiles) is 0)):
    if len(loadFiles):
      fileName,_,_,_ = loadFiles[0]
      info = get_image_info(fileName)
      delays = np.array([info.stageDelay])
      loadImgDict[info.stageDelay] = (0, 0)
      initializeFiles = False
    elif len(queryFiles):
      info = get_image_info(queryFiles[0])
      delays = np.array([info.stageDelay])
      legCoeffDict[info.stageDelay] = (0, 0)
      initializeFiles = False
    elif config.doQueryFolder:
      queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)
    else:
      print("ERROR: Cannot run without loading files or querying folder!!!")
      sys.exit()

  for i in np.arange(len(loadFiles)):
    fileName,_,_,_ = loadFiles[-1]
    imgSum = np.sum(get_image(fileName, config.hotPixel))
    if (imgSum < config.sumMin) or (imgSum > config.sumMax):
      del loadFiles[-1]
    else:
      break


  ###  retrieving atomic diffraction  ###
  if config.normByAtomic:
    atomicDiffraction = np.fromfile(config.atomicDiffractionFile, 
        dtype=config.atomicDiffractionDataType)*1e20
    qGrid = (np.array(config.NradialBins, np.float) + 0.5)\
              *config.Qmax/config.NradialBins
    atomicNorm = 1./(atomicDiffraction*qGrid)


  ###  retrieve gMatrix for legendre fitting  ###
  assert ((config.roi + (1 - config.roi%2))%config.Nrebin == 0),\
      "ERROR: Cannot rebin an image with size [{}, {}] by {}, change roi!".format(
          config.roi+1, config.roi+1, config.Nrebin)
  imgRebinSize = (config.roi+1)/config.Nrebin

  gMatrixName = "gMatrix_pixels-" + str((config.roi+1)/config.Nrebin)\
                + "Nradii-" + str(config.NradialBins)\
                + "Nlegendre-" + str(config.Nlegendres) + ".dat"

  if not os.path.isfile(config.gMatrixFolder + "/" + gMatrixName):
    make_legendre_gMatrix(config.NradialBins, config.Nlegendres,
                      imgRebinSize, config.gMatrixFolder + "/" + gMatrixName)

  gMatrix = np.fromfile(config.gMatrixFolder + "/" + gMatrixName,
                          dtype=np.float)
  gMatrix = np.reshape(gMatrix,
                (imgRebinSize**2, config.NradialBins*config.Nlegendres))

  # invert g matrix using SVD decomposition
  gInv = invert_matrix_SVD(gMatrix)


  ##################################################
  #####  looping through images and analysing  #####
  ##################################################

  loadingImage = False
  curBkgAddr = ""
  loadConfig = copy.deepcopy(config)
  while (len(loadFiles) != 0) or config.doQueryFolder:
    if len(loadFiles):
      name, bkgAddr, loadConfig.centerR, loadConfig.centerC = loadFiles.pop(0)
      while name in loadedFiles:
        name, bkgAddr, loadConfig.centerR, loadConfig.centerC = loadFiles.pop(0)

      loadingImage = True
      centerConfig = loadConfig
      loadedFiles.append(name)

      # load background
      if curBkgAddr is not fld["background"]:
        curBkgAddr = fld["background"]
        if curBkgAddr is not None:
          bkgImg = get_image(fld["background"], config.hotPixel)

    elif len(queryFiles):
      name = queryFiles.pop(0)
      imgAddr = None
      loadingImage = False
      centerConfig = config
      loadedFiles.append(name)
   
      # load background
      if curBkgAddr is not config.queryBkgAddr:
        curBkgAddr = config.queryBkgAddr
        if curBkgAddr is not None:
          bkgImg = get_image(config.queryBkgAddr, config.hotPixel)

    else:
      ###  save current results  ###
      if config.saveQueryResults:
        save_results(legCoeffDict, loadedFiles, averageLegCoeffArray,
            config.saveFolder, config.saveFileName)

      ###  search query folder for new files  ###
      
      queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)
      if len(queryFiles) is 0:
        print "INFO: Query folder is empty, waiting to check again",

      while len(queryFiles) == 0:
        print "...",
        time.sleep(1)
        queryFiles = query_folder(config.queryFolder, config.fileExtention, loadedFiles)
      continue

    print("Now looking at %s" % name)
    ###  get image information  ###
    info = get_image_info(name)

    ###  get image and remove hot pixels  ###
    imgOrig = get_image(name, config.hotPixel)
    aggregateImage += imgOrig
    img = copy.deepcopy(imgOrig)

    ###  check total scattering intensity  ###
    imgSum = np.sum(img)
    imageSums.append(imgSum)
    if (imgSum < config.sumMin) or (imgSum > config.sumMax):
      NsumRejected += 1
      print("INFO: Rejected image %s with a total sum of %f!" % (name, imgSum))
      print("INFO: Total sum cut has rejected %i images!" % (NsumRejected))
      continue
    if (getImgNormDistribution):
      if ((len(loadFiles) is 0) and (len(queryFiles) is 0)):
        return imageSums
      continue


    ###  subtract background images  ###
    if curBkgAddr is not None:
      img -= bkgImg #background_subtraction(img, bkgImg)

    ###  center image  ###
    img, centerR, centerC = centering(img, centerConfig)
    #plt.imshow(img)
    #plt.show()

    ###  readout noise subtraction  ###
    img = readoutNoise_subtraction(img, True, 
                rLow=config.ROradLow, rHigh=config.ROradHigh)
    #plt.imshow(img)
    #plt.show()

    ###  image norm  ###
    imgNorm = 1#get_image_norm(img, config.normRadLow, config.normRadHigh)


    #####  update loaded images  #####
    if loadingImage:
      ind = np.searchsorted(delays, [info.stageDelay])[0]
      if np.any(np.abs(delays-info.stageDelay) < 0.005):
        delayInd = delays[ind]
        loadImg,norm = loadImgDict[delayInd] 
        loadImgDict[delayInd] = (loadImg + img, norm + imgNorm)
      else:
        delays = np.insert(delays, ind, info.stageDelay)
        loadImgDict[info.stageDelay] = (img, imgNorm)

      """
      legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres,
                                       config.NradialBins, gInv=gInv)
      X,Y = np.meshgrid(np.array([0,1]), Qrange)
      fitTest = plt.figure()
      axTest = figTest.add_subplot(111)
      img = axTest.pcolor(X, Y, np.reshape(legendreCoeffs[0,:],(1,-1)).T, cmap=cm.jet)
          axLoad.set_ylim([0,config.Qmax])
          axLoad.set_xlim([timeDelay[0],timeDelay[-1]])
          figLoad.colorbar(img, ax=axLoad)
          img.set_clim(-1*maxRange, maxRange)
          figLoad.canvas.draw()
          figLoad.savefig("legednre" + str(i) + "_loadedFiles.png")


      """
      if len(loadFiles) == 0:
        averageLegCoeffArray = np.zeros((config.Nlegendres, 
                                         delays.shape[0], 
                                         config.NradialBins), np.float)
        for i,d in enumerate(delays):
          # fit legendres
          img,norm = loadImgDict[d]
          legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres,
                                            config.NradialBins, gInv=gInv)
          if config.normByAtomic:
            legendreCoeffs *= atomicNorm 

          # record results
          legCoeffDict[d] = (legendreCoeffs, norm)
          averageLegCoeffArray[:,i,:] = legendreCoeffs/norm

        ###  save results  ###
        if config.saveLoadedResults:
          save_results(legCoeffDict, loadedFiles, averageLegCoeffArray,
            config.saveFolder, config.saveFileName)


        ###  plot results of loaded files  ###
        timeDelay = (delays - delays[0])*1e-9/(3e8*1e-12)
        if timeDelay.shape[0] > 1:
          timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2])
        else:
          timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05)
        timeDelay = timeDelay[1:]
        X,Y = np.meshgrid(timeDelay, Qrange)

        for i in [0,2]:
          figLoad = plt.figure()
          axLoad = figLoad.add_subplot(111)

          subTZleg = averageLegCoeffArray[i] - np.mean(averageLegCoeffArray[i,:4,:], axis=0)
          shp = subTZleg.shape
          mn = np.mean(subTZleg[:,0.2*shp[1]:0.7*shp[1]], axis=(0,1))
          std = np.std(subTZleg[:,0.2*shp[1]:0.7*shp[1]], axis=(0,1))
          if mn > 0:
            maxRange = np.abs(mn - 3*std)
          else:
            maxRange = mn + 3*std
          #maxRange = 0.14

          #axLoad.pcolor(X, Y, averageLegCoeffArray[i,:,:].T, cmap=cm.RdBu)
          img = axLoad.pcolor(X, Y, subTZleg[1:,:].T, cmap=cm.jet)
          axLoad.set_ylim([0,config.Qmax])
          axLoad.set_xlim([timeDelay[0],timeDelay[-1]])
          figLoad.colorbar(img, ax=axLoad)
          img.set_clim(-1*maxRange, maxRange)
          figLoad.canvas.draw()
          figLoad.savefig(config.plotDirectory 
                  + "/legendre" + str(i) + "_loadedFiles.png")


      continue


    #####  fit legendres  #####
    legendreCoeffs = fit_legendres(img, config.Nrebin, config.Nlegendres,
                                            config.NradialBins, gInv=gInv)
    if config.normByAtomic:
      legendreCoeffs *= atomicNorm 

    #####  update time domain legendres  #####
    ind = np.searchsorted(delays, [info.stageDelay])[0]
    if np.any((delays-info.stageDelay) == 0):
      delayInd = delays[ind]
      coeffs,norm = legCoeffDict[delayInd] 
      updatedCoeffs = coeffs + legendreCoeffs
      legCoeffDict[delayInd] = (updatedCoeffs, norm + imgNorm)
      averageLegCoeffArray[:,ind,:] = updatedCoeffs[:,:]/(norm + imgNorm)
    else:
      delays = np.insert(delays, ind, info.stageDelay)
      averageLegCoeffArray = np.insert(averageLegCoeffArray, ind, 
                                        legendreCoeffs[:,:], axis=1)
      legCoeffDict[info.stageDelay] = (legendreCoeffs, imgNorm)

    #####  plot time domain legendre fits  #####

    ###  diffraction patterns  ###
    axCurDP.imshow(imgOrig)
    axSumDP.imshow(aggregateImage)

    plotCurLeg.set_ydata(legendreCoeffs[0,:])
    axCurL0.set_ylim([0.9*min(legendreCoeffs[0,:]),
                      1.1*max(legendreCoeffs[0,:])])

    axTotCN.plot(np.arange(len(imageSums)), imageSums, color="k")

    ###  time dependent plots  ###
    plotInds = np.reshape(np.argwhere(delays > config.plotMinDelay*1e6), (-1))
    timeDelay = (delays[plotInds] - delays[0])/(3e8*1e-3)
    if timeDelay.shape[0] > 1:
      timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2])
    else:
      timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05)
    X,Y = np.meshgrid(timeDelay, Qrange)
    #axLegAll.pcolor(Qrange, timeDelay, averageLegCoeffArray[0,:,:], cmap=cm.RdBu)
    # aggregate legendre 0 plot
    meanSubL0 = averageLegCoeffArray[0,plotInds,:]\
                  - np.mean(averageLegCoeffArray[0,plotInds,:], axis=0)
    axAllL0.pcolor(X, Y, meanSubL0.T, cmap=cm.RdBu)
    axAllL0.set_ylim([0,config.Qmax])
    axAllL0.set_xlim([timeDelay[0],timeDelay[-1]])

    lineOut = np.sum(meanSubL0[:,LOmin:LOmax], axis=1)
    plotL0LO.set_data(timeDelay[:-1], lineOut)
    axLinL0.set_ylim([min(lineOut),max(lineOut)])
    axLinL0.set_xlim([timeDelay[0],timeDelay[-1]])
    #plotL0LO.set_xdata(timeDelay[:-1])
    #plotL0LO.set_ydata(lineOut)

    # aggregate legendre 2 plot
    meanSubL2 = averageLegCoeffArray[2,plotInds,:]\
                  - np.mean(averageLegCoeffArray[2,plotInds,:], axis=0)
    axAllL2.pcolor(X, Y, meanSubL2.T, cmap=cm.RdBu)
    axAllL2.set_ylim([0,config.Qmax])
    axAllL2.set_xlim([timeDelay[0],timeDelay[-1]])

    lineOut = np.sum(meanSubL2[:,LOmin:LOmax], axis=1)
    plotL2LO.set_data(timeDelay[:-1], lineOut)
    axLinL2.set_ylim([min(lineOut),max(lineOut)])
    axLinL2.set_xlim([timeDelay[0],timeDelay[-1]])
    #plotL2LO.set_xdata(timeDelay[:-1])
    #plotL2LO.set_ydata(lineOut)

    #plt.autoscale(tight=True)
    plt.tight_layout()

    fig.canvas.draw()



  ################################
  #####  plot final results  #####
  ################################

  finalFig = plt.figure()
  ax = finalFig.add_subplot(111)

  timeDelay = (delays - delays[0])*1e-2/(3e8*1e-12)
  if timeDelay.shape[0] > 1:
    timeDelay = np.insert(timeDelay, -1, 2*timeDelay[-1]-timeDelay[-2])
  else:
    timeDelay = np.insert(timeDelay, -1, timeDelay[-1]+0.05)
  X,Y = np.meshgrid(timeDelay, Qrange)

  for i in range(config.Nlegendres):
    ax.pcolor(X, Y, averageLegCoeffArray[i,:,:].T, cmap=cm.RdBu)
    finalFig.savefig(config.plotDirectory + "/" + 
          config.plotPrefix + "Legendre" + str(i) + ".png")
Example #57
0
    def write_vid_results_multiprocess(self, detection, gpu_id):
        """
        write results files in pascal devkit path
        :param all_boxes: boxes to be processed [bbox, confidence]
        :return: None
        """

        print 'Writing {} ImageNetVID results file'.format('all')
        filename = self.get_result_file_template(gpu_id).format('all')
        frame_seg_len = self.frame_seg_len
        nms = py_nms_wrapper(0.3)
        data_time = 0
        all_boxes = detection[0]
        frame_ids = detection[1]
        start_idx = 0
        sum_frame_ids = np.cumsum(frame_seg_len)
        first_true_id = frame_ids[0]
        start_video = np.searchsorted(sum_frame_ids, first_true_id)

        for im_ind in range(1, len(frame_ids)):
            t = time.time()
            true_id = frame_ids[im_ind]
            video_index = np.searchsorted(sum_frame_ids, true_id)
            if (video_index != start_video):  # reprensents a new video
                t1 = time.time()
                video = [
                    all_boxes[j][start_idx:im_ind]
                    for j in range(1, self.num_classes)
                ]
                dets_all = seq_nms(video)
                for j in xrange(1, self.num_classes):
                    for frame_ind, dets in enumerate(dets_all[j - 1]):
                        keep = nms(dets)
                        all_boxes[j][frame_ind + start_idx] = dets[keep, :]
                start_idx = im_ind
                start_video = video_index
                t2 = time.time()
                print 'video_index=', video_index, '  time=', t2 - t1
            data_time += time.time() - t
            if (im_ind % 100 == 0):
                print '{} seq_nms testing {} data {:.4f}s'.format(
                    frame_ids[im_ind - 1], im_ind, data_time / im_ind)

        # the last video
        video = [
            all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes)
        ]
        dets_all = seq_nms(video)
        for j in xrange(1, self.num_classes):
            for frame_ind, dets in enumerate(dets_all[j - 1]):
                keep = nms(dets)
                all_boxes[j][frame_ind + start_idx] = dets[keep, :]

        with open(filename, 'wt') as f:
            for im_ind in range(len(frame_ids)):
                for cls_ind, cls in enumerate(self.classes):
                    if cls == '__background__':
                        continue
                    dets = all_boxes[cls_ind][im_ind]
                    if len(dets) == 0:
                        continue
                    # the imagenet expects 0-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:d} {:d} {:.4f} {:.2f} {:.2f} {:.2f} {:.2f}\n'.
                            format(frame_ids[im_ind], cls_ind, dets[k, -1],
                                   dets[k, 0], dets[k, 1], dets[k, 2],
                                   dets[k, 3]))
        return
Example #58
0
 def del_data_at(self, ind):
     idx = np.searchsorted(self.inds, ind)
     self.inds = np.delete(self.inds, idx)
     self.data = np.delete(self.data, idx, axis=0)
     self._update_centroid()
Example #59
0
def find_slice(
    data: np.ndarray, positions: np.ndarray, s: slice
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Get slice of RLE data.

    Parameters
    ----------
    data:
        Data at start of reach run.
    positions:
        End positions of runs.

    Returns
    -------
    data:
        Data at start of reach run.
    positions:
        End positions of runs.
    """
    length = get_len(positions)

    start = 0
    stop = length
    step = None
    invert = False

    if (s.step is not None) and (s.step != 1):
        if s.step < 0:
            invert = True
            step = -s.step
        else:
            step = s.step

    if s.start is not None:
        if s.start < 0:
            start = max(start, length + s.start)
        else:
            start = max(start, s.start)
    if s.stop is not None:
        if s.stop < 0:
            stop = max(0, min(stop, length + s.stop))
        else:
            stop = min(stop, s.stop)

    if start == 0:
        idx_start = 0
    elif start >= length:
        idx_start = len(positions)
    else:
        idx_start = np.searchsorted(positions, start, side="right")
    if stop == 0:
        idx_stop = 0
    elif stop >= length:
        idx_stop = len(positions)
    else:
        idx_stop = np.searchsorted(positions, stop, side="left") + 1

    data = data[idx_start:idx_stop]
    positions = positions[idx_start:idx_stop] - start
    if len(positions) > 0:
        positions[-1] = stop - start

    if invert:
        lenghts = calc_lengths(positions)
        lenghts = lenghts[::-1]
        positions = np.cumsum(lenghts)
        data = data[::-1]

    if step:
        positions = ((positions - 1) // step) + 1

        mask = np.empty(len(positions), dtype=bool)
        if len(positions) > 0:
            mask[0] = True
        mask[1:] = positions[1:] != positions[:-1]

        data = data[mask]
        positions = positions[mask]

        data, positions = recompress(data, positions)

    return (data, positions)
Example #60
0
 def get_data_at(self, ind):
     return self.data[np.searchsorted(self.inds, ind)]