Exemple #1
0
 def test_result_values(self):
     tgt = [np.percentile(d, 28) for d in _rdat]
     res = np.nanpercentile(_ndat, 28, axis=1)
     assert_almost_equal(res, tgt)
     tgt = [np.percentile(d, (28,98)) for d in _rdat]
     res = np.nanpercentile(_ndat, (28,98), axis=1)
     assert_almost_equal(res, tgt)
    def _auto_limits(self):

        if self.component_data is None:
            return

        exclude = (100 - self.percentile) / 2.

        # For subsets in 'data' mode, we want to compute the limits based on
        # the full dataset, not just the subset.
        if isinstance(self.data, Subset):
            data_values = self.data.data[self.component_id]
        else:
            data_values = self.data[self.component_id]

        try:
            lower = np.nanpercentile(data_values, exclude)
            upper = np.nanpercentile(data_values, 100 - exclude)
        except AttributeError:  # Numpy < 1.9
            data_values = data_values[~np.isnan(data_values)]
            lower = np.percentile(data_values, exclude)
            upper = np.percentile(data_values, 100 - exclude)

        if isinstance(self.data, Subset):
            lower = 0

        self.set_limits(lower, upper)
Exemple #3
0
def shift_mask_data(X, Y, upper_percentile=70, lower_percentile=30, n_fwd_days=1):
    # Shift X to match factors at t to returns at t+n_fwd_days (we want to predict future returns after all)
    shifted_X = np.roll(X, n_fwd_days+1, axis=0)
    
    # Slice off rolled elements
    X = shifted_X[n_fwd_days+1:]
    Y = Y[n_fwd_days+1:]
    
    n_time, n_stocks, n_factors = X.shape
    
    # Look for biggest up and down movers
    upper = np.nanpercentile(Y, upper_percentile, axis=1)[:, np.newaxis]
    lower = np.nanpercentile(Y, lower_percentile, axis=1)[:, np.newaxis]
  
    upper_mask = (Y >= upper)
    lower_mask = (Y <= lower)
    
    mask = upper_mask | lower_mask # This also drops nans
    mask = mask.flatten()
    
    # Only try to predict whether a stock moved up/down relative to other stocks
    Y_binary = np.zeros(n_time * n_stocks)
    Y_binary[upper_mask.flatten()] = 1
    Y_binary[lower_mask.flatten()] = -1
    
    # Flatten X
    X = X.reshape((n_time * n_stocks, n_factors))

    # Drop stocks that did not move much (i.e. are in the 30th to 70th percentile)
    X = X[mask]
    Y_binary = Y_binary[mask]
    
    return X, Y_binary
Exemple #4
0
def qmap_mean_departure(x, sample1, sample2, meinequantilen, sample_size,
                        return_mean=False, linear=True):
    from support_functions import qstats

    s1d = x[sample1]  # truth (sample1)
    s2d = x[sample2]  # biased (sample2)

    # add 0 and 100
    meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]]))

    qb = np.nanpercentile(s1d, meinequantilen)  # truth
    qa = np.nanpercentile(s2d, meinequantilen)  # biased
    mean1 = np.copy(qb)
    mean2 = np.copy(qa)

    # Mean of quantile boxes( not 0 and 100 )
    count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size)
    count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size)
    # only missing ?
    mean1[:-1] = m1
    mean2[:-1] = m2
    # interpolation of bin-means
    if linear:
        m1d = np.interp(s2d, qb[1:], mean1[:-1])  # interpoliere Mittelwerte zu Daten
        m2d = np.interp(s2d, qa[1:], mean2[:-1])
    else:
        tck = interpolate.splrep(qb[1:], mean1[:-1], s=0)
        m1d = interpolate.splev(s2d, tck, der=0)
        tck = interpolate.splrep(qa[1:], mean2[:-1], s=0)
        m2d = interpolate.splev(s2d, tck, der=0)
    # difference
    if return_mean:
        return m1, m2

    return m1d - m2d   # one value
Exemple #5
0
def simpleStats(y, axis=None):
    """ Computes simple statistics

    Computes the mean, median, min, max, standard deviation, and interquartile
    range of a numpy array y.

    Args:
        y (array): A Numpy array
        axis (int, typle of ints): Optional. Axis or Axes along which the means
            are computed, the default is to compute the mean of the flattened
            array. If a tuple of ints, performed over multiple axes

    Returns:
        The mean, median, min, max, standard deviation and IQR by columns

    """
    # make sure that y is an array
    y = np.array(y, dtype='float64')

    # Perform the various calculations
    mean = np.nanmean(y, axis=axis)
    std = np.nanstd(y, axis=axis)
    median = np.nanmedian(y, axis=axis)
    min_ = np.nanmin(y, axis=axis)
    max_ = np.nanmax(y, axis=axis)
    IQR = np.nanpercentile(y, 75, axis=axis) - np.nanpercentile(y, 25, axis=axis)

    return mean, median, min_, max_, std, IQR
Exemple #6
0
    def update_values(self, use_default_modifiers=False, **properties):

        if not any(prop in properties for prop in ('attribute', 'percentile', 'log')):
            self.set(percentile='Custom')
            return

        if use_default_modifiers:
            percentile = 100
            log = False
        else:
            percentile = self.percentile or 100
            log = self.log or False

        if percentile == 'Custom' or self.data is None:

            self.set(percentile=percentile, log=log)

        else:

            exclude = (100 - percentile) / 2.

            data_values = self.data_values

            try:
                lower = np.nanpercentile(data_values, exclude)
                upper = np.nanpercentile(data_values, 100 - exclude)
            except AttributeError:  # Numpy < 1.9
                data_values = data_values[~np.isnan(data_values)]
                lower = np.percentile(data_values, exclude)
                upper = np.percentile(data_values, 100 - exclude)

            self.set(lower=lower, upper=upper, percentile=percentile, log=log)
    def test_multiple_percentiles(self):
        perc = [50, 100]
        mat = np.ones((4, 3))
        nan_mat = np.nan * mat
        # For checking consistency in higher dimensional case
        large_mat = np.ones((3, 4, 5))
        large_mat[:, 0:2:4, :] = 0
        large_mat[:, :, 3:] *= 2
        for axis in [None, 0, 1]:
            for keepdim in [False, True]:
                with warnings.catch_warnings(record=True) as w:
                    warnings.simplefilter('always')
                    val = np.percentile(mat, perc, axis=axis, keepdims=keepdim)
                    nan_val = np.nanpercentile(nan_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val.shape, val.shape)

                    val = np.percentile(large_mat, perc, axis=axis,
                                        keepdims=keepdim)
                    nan_val = np.nanpercentile(large_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val, val)

        megamat = np.ones((3, 4, 5, 6))
        assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
Exemple #8
0
def print_stats(array):
    print
    print "5th percentile of data is: " + '\t\t\t' + str(round(np.nanpercentile(array, 5), 2)) + "um"
    print "95th percentile of data is: " + '\t\t\t' + str(round(np.nanpercentile(array, 95), 2)) + "um"
    print "Peak-to-peak amplitude of structure is: " + '\t' + str(round(np.nanpercentile(array, 95)-np.nanpercentile(array, 5), 2)) + "um"
    print "Half peak-to-peak amplitude of structure is: " + '\t' + str(round((np.nanpercentile(array, 95)-np.nanpercentile(array, 5))/2, 2)) + "um"
    print 
	def display(self,keys= None,live = True, scale = False):
		"""
		plot the training data
		"""
		if keys == None:
			keys = self.headers
		plt.clf()
		fig, axes = plt.subplots(1, len(keys), figsize=(len(keys) * 5,5), squeeze=False)  
		counter = 0
		for fig_j,c in enumerate(self.categories):
			for fig_i, h in enumerate(keys):
				ax1 = axes[0,fig_i]
				ax1.plot(self.ys[c][h].x,self.ys[c][h].y,colors[fig_j])
				if scale:
					m = np.nanpercentile(self.ys[c][h].y , 25, interpolation="higher")
					M = np.nanpercentile(self.ys[c][h].y , 75, interpolation="higher")
					ax1.set_ylim([0 , 1.5 * M])
				val = self.ys[c][h].y[-1]
				#ax1.set_title(h + ": " +  str(val))
				if counter == 0:
					ax1.set_title("{0} : {1:.3f}".format(h,val))
				#ax1.annotate(self.ys[h][-1],xy=(   , np.mean(self.ys[h]) ) )
			counter += 1
		fig.tight_layout()
		if live:
			display.clear_output(wait=True)
			display.display(plt.gcf())
			plt.close()
		else:
			plt.plot()
			plt.show()
Exemple #10
0
def Tukey_outliers(set_of_means, FDR=0.005, supporting_interval=0.5, verbose=False):
    """
    Performs Tukey quintile test for outliers from a normal distribution with defined false discovery rate
    :param set_of_means:
    :param FDR:
    :return:
    """
    # false discovery rate v.s. expected falses v.s. power
    q1_q3 = norm.interval(supporting_interval)
    # TODO: this is not necessary: we can perfectly well fit it with proper params to FDR
    FDR_q1_q3 = norm.interval(1 - FDR)
    multiplier = (FDR_q1_q3[1] - q1_q3[1]) / (q1_q3[1] - q1_q3[0])
    l_means = len(set_of_means)

    q1 = np.nanpercentile(set_of_means, 50*(1-supporting_interval))
    q3 = np.nanpercentile(set_of_means, 50*(1+supporting_interval))
    high_fence = q3 + multiplier*(q3 - q1)
    low_fence = q1 - multiplier*(q3 - q1)

    if verbose:
        print 'FDR:', FDR
        print 'q1_q3', q1_q3
        print 'FDRq1_q3', FDR_q1_q3
        print 'q1, q3', q1, q3
        print 'fences', high_fence, low_fence

    if verbose:
        print "FDR: %s %%, expected outliers: %s, outlier 5%% confidence interval: %s" % \
              (FDR*100, FDR*l_means, poisson.interval(0.95, FDR*l_means))

    ho = (set_of_means < low_fence).nonzero()[0]
    lo = (set_of_means > high_fence).nonzero()[0]

    return lo, ho
    def _auto_limits(self):

        if self.data is None:
            return

        if self.attribute is None:
            return

        if self.subset_mode == 'outline':
            self.set_limits(0, 1)
            return

        exclude = (100 - self.percentile) / 2.

        # For subsets in 'data' mode, we want to compute the limits based on
        # the full dataset, not just the subset.
        if self.subset_mode == 'data':
            data_values = self.data.data[self.attribute]
        else:
            data_values = self.data[self.attribute]

        try:
            lower = np.nanpercentile(data_values, exclude)
            upper = np.nanpercentile(data_values, 100 - exclude)
        except AttributeError:  # Numpy < 1.9
            data_values = data_values[~np.isnan(data_values)]
            lower = np.percentile(data_values, exclude)
            upper = np.percentile(data_values, 100 - exclude)

        if self.subset_mode == 'data':
            self.set_limits(0, upper)
        else:
            self.set_limits(lower, upper)
Exemple #12
0
def _rescale_imshow_rgb(darray, vmin, vmax, robust):
    assert robust or vmin is not None or vmax is not None
    # There's a cyclic dependency via DataArray, so we can't import from
    # xarray.ufuncs in global scope.
    from xarray.ufuncs import maximum, minimum
    # Calculate vmin and vmax automatically for `robust=True`
    if robust:
        if vmax is None:
            vmax = np.nanpercentile(darray, 100 - ROBUST_PERCENTILE)
        if vmin is None:
            vmin = np.nanpercentile(darray, ROBUST_PERCENTILE)
    # If not robust and one bound is None, calculate the default other bound
    # and check that an interval between them exists.
    elif vmax is None:
        vmax = 255 if np.issubdtype(darray.dtype, np.integer) else 1
        if vmax < vmin:
            raise ValueError(
                'vmin=%r is less than the default vmax (%r) - you must supply '
                'a vmax > vmin in this case.' % (vmin, vmax))
    elif vmin is None:
        vmin = 0
        if vmin > vmax:
            raise ValueError(
                'vmax=%r is less than the default vmin (0) - you must supply '
                'a vmin < vmax in this case.' % vmax)
    # Scale interval [vmin .. vmax] to [0 .. 1], with darray as 64-bit float
    # to avoid precision loss, integer over/underflow, etc with extreme inputs.
    # After scaling, downcast to 32-bit float.  This substantially reduces
    # memory usage after we hand `darray` off to matplotlib.
    darray = ((darray.astype('f8') - vmin) / (vmax - vmin)).astype('f4')
    return minimum(maximum(darray, 0), 1)
Exemple #13
0
def timeseries(iData, zoneMap, std=None):
    '''
    Make zone-wise averaging of input data
    input: 3D matrix(Layers x Width x Height) and map of zones (W x H)
    output: 2D matrices(L x WH) with mean and std
    '''
    #reshape input cube into 2D matrix
    r, h, w = iData.shape
    iData, notNanDataI = cube2flat(iData)
    #get unique values of not-nan labels
    uniqZones = np.unique(zoneMap[np.isfinite(zoneMap)])
    zoneNum = np.zeros((r, uniqZones.size))
    zoneMean = np.zeros((r, uniqZones.size))
    zoneStd = np.zeros((r, uniqZones.size))
    zoneP16 = np.zeros((r, uniqZones.size))
    zoneP84 = np.zeros((r, uniqZones.size))

    #in each zone: get all values from input data get not nan data average
    for i in range(uniqZones.size):
        zi = uniqZones[i]
        if not np.isnan(zi):
            zoneData = iData[:, zoneMap.flat == zi]
            zoneNum[:, i] = zi
            if std is not None:
                # filter out of maxSTD values
                outliers = (np.abs(zoneData.T - zoneMean[:, i]) > zoneStd[:, i] * std).T
                zoneData[outliers] = np.nan

            zoneMean[:, i] = np.nanmean(zoneData, axis=1)
            zoneStd[:, i] = np.nanstd(zoneData, axis=1)
            zoneP16[:, i] = np.nanpercentile(zoneData, 16, axis=1)
            zoneP84[:, i] = np.nanpercentile(zoneData, 84, axis=1)

    return zoneMean, zoneStd, zoneNum, zoneP16, zoneP84
    def test_multiple_percentiles(self):
        perc = [50, 100]
        mat = np.ones((4, 3))
        nan_mat = np.nan * mat
        # For checking consistency in higher dimensional case
        large_mat = np.ones((3, 4, 5))
        large_mat[:, 0:2:4, :] = 0
        large_mat[:, :, 3:] *= 2
        for axis in [None, 0, 1]:
            for keepdim in [False, True]:
                with suppress_warnings() as sup:
                    sup.filter(RuntimeWarning, "All-NaN slice encountered")
                    val = np.percentile(mat, perc, axis=axis, keepdims=keepdim)
                    nan_val = np.nanpercentile(nan_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val.shape, val.shape)

                    val = np.percentile(large_mat, perc, axis=axis,
                                        keepdims=keepdim)
                    nan_val = np.nanpercentile(large_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val, val)

        megamat = np.ones((3, 4, 5, 6))
        assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
Exemple #15
0
def truncate_range(data, percMin=0.25, percMax=99.75, discard_zeros=True):
    """Truncate too low and too high values.

    Parameters
    ----------
    data : np.ndarray
        Image to be truncated.
    percMin : float
        Percentile minimum.
    percMax : float
        Percentile maximum.
    discard_zeros : bool
        Discard voxels with value 0 from truncation.

    Returns
    -------
    data : np.ndarray
        Truncated data.
    pMin : float
        Minimum truncation threshold which is used.
    pMax : float
        Maximum truncation threshold which is used.

    """
    if discard_zeros:
        msk = ~np.isclose(data, 0.)
        pMin, pMax = np.nanpercentile(data[msk], [percMin, percMax])
    else:
        pMin, pMax = np.nanpercentile(data, [percMin, percMax])
    temp = data[~np.isnan(data)]
    temp[temp < pMin], temp[temp > pMax] = pMin, pMax  # truncate min and max
    data[~np.isnan(data)] = temp
    if discard_zeros:
        data[~msk] = 0  # put back masked out voxels
    return data, pMin, pMax
Exemple #16
0
    def _compute(self, arrays, dates, assets, mask):
        """
        For each row in the input, compute a mask of all values falling between
        the given percentiles.
        """
        # TODO: Review whether there's a better way of handling small numbers
        # of columns.
        data = arrays[0].copy().astype(float64)
        data[~mask] = nan

        # FIXME: np.nanpercentile **should** support computing multiple bounds
        # at once, but there's a bug in the logic for multiple bounds in numpy
        # 1.9.2.  It will be fixed in 1.10.
        # c.f. https://github.com/numpy/numpy/pull/5981
        lower_bounds = nanpercentile(
            data,
            self._min_percentile,
            axis=1,
            keepdims=True,
        )
        upper_bounds = nanpercentile(
            data,
            self._max_percentile,
            axis=1,
            keepdims=True,
        )
        return (lower_bounds <= data) & (data <= upper_bounds)
Exemple #17
0
    def test_percentile_nasty_partitions(self):
        # Test percentile with nasty partitions: divide up 5 assets into
        # quartiles.
        # There isn't a nice mathematical definition of correct behavior here,
        # so for now we guarantee the behavior of numpy.nanpercentile.  This is
        # mostly for regression testing in case we write our own specialized
        # percentile calculation at some point in the future.

        data = arange(25, dtype=float).reshape(5, 5) % 4
        quartiles = range(4)
        filter_names = ['pct_' + str(q) for q in quartiles]

        graph = TermGraph(
            {
                name: self.f.percentile_between(q * 25.0, (q + 1) * 25.0)
                for name, q in zip(filter_names, quartiles)
            }
        )
        results = self.run_graph(
            graph,
            initial_workspace={self.f: data},
            mask=self.build_mask(ones((5, 5))),
        )

        for name, quartile in zip(filter_names, quartiles):
            result = results[name]
            lower = quartile * 25.0
            upper = (quartile + 1) * 25.0
            expected = and_(
                nanpercentile(data, lower, axis=1, keepdims=True) <= data,
                data <= nanpercentile(data, upper, axis=1, keepdims=True),
            )
            check_arrays(result, expected)
 def test_result_values(self):
     tgt = [np.percentile(d, 28) for d in _rdat]
     res = np.nanpercentile(_ndat, 28, axis=1)
     assert_almost_equal(res, tgt)
     # Transpose the array to fit the output convention of numpy.percentile
     tgt = np.transpose([np.percentile(d, (28, 98)) for d in _rdat])
     res = np.nanpercentile(_ndat, (28, 98), axis=1)
     assert_almost_equal(res, tgt)
    def doCalc(self):
        self.median = float(np.nanmedian(self.list_values))
        self.average = float(np.nanmean(self.list_values))
        self.mode = float(stats.mode(self.list_values, nan_policy='omit')[0])
        #self.average = self.sum / self.len

        self.CI['min'] = float(np.nanpercentile(self.list_values, 5))
        self.CI['max'] = float(np.nanpercentile(self.list_values, 95))
Exemple #20
0
def plot_quantile_returns_violin(return_by_q,
                                 ylim_percentiles=None,
                                 ax=None):
    """
    Plots a violin box plot of period wise returns for factor quantiles.

    Parameters
    ----------
    return_by_q : pd.DataFrame - MultiIndex
        DataFrame with date and quantile as rows MultiIndex,
        forward return windows as columns, returns as values.
    ylim_percentiles : tuple of integers
        Percentiles of observed data to use as y limits for plot.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    return_by_q = return_by_q.copy()
        
    if ylim_percentiles is not None:
        ymin = (np.nanpercentile(return_by_q.values,
                              ylim_percentiles[0]) * DECIMAL_TO_BPS)
        ymax = (np.nanpercentile(return_by_q.values,
                              ylim_percentiles[1]) * DECIMAL_TO_BPS)
    else:
        ymin = None
        ymax = None

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    unstacked_dr = (return_by_q
                    .multiply(DECIMAL_TO_BPS))
    unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods')
    unstacked_dr = unstacked_dr.stack()
    unstacked_dr.name = 'return'
    unstacked_dr = unstacked_dr.reset_index()

    sns.violinplot(data=unstacked_dr,
                   x='factor_quantile',
                   hue='forward_periods',
                   y='return',
                   orient='v',
                   cut=0,
                   inner='quartile',
                   ax=ax)
    ax.set(xlabel='', ylabel='Return (bps)',
           title="Period Wise Return By Factor Quantile",
           ylim=(ymin, ymax))

    ax.axhline(0.0, linestyle='-', color='black', lw=0.7, alpha=0.6)

    return ax
Exemple #21
0
def qmap_departure(x, sample1, sample2, meinequantilen, sample_size, sample3=None, return_mean=False, linear=True,
                   verbose=0):
    from support_functions import qstats
    #
    s1d = x[sample1]  # truth (sample1)
    s2d = x[sample2]  # biased (sample2)
    #
    # add 0 and 100
    meinequantilen = np.unique(np.concatenate([[0], meinequantilen, [100]]))
    # Be sure to remove 0,100 now
    # Mean of quantile boxes( not 0 and 100 )
    count1, m1 = qstats(s1d, meinequantilen[1:-1], counts=sample_size)
    count2, m2 = qstats(s2d, meinequantilen[1:-1], counts=sample_size)
    ok1 = count1[:-1] > sample_size
    ok2 = count2[:-1] > sample_size
    # Enough data to calculate ?
    if not np.any(ok1 & ok2):
        if sample3 is not None:
            return np.zeros(x[sample3].shape)  # return only zeros
        else:
            return np.zeros(s2d.shape)
    #
    if verbose > 1:
        print "Quantiles:", meinequantilen
        print "Sample 1: ", count1
        print "Sample 2: ", count2
    # 
    qb = np.nanpercentile(s1d, meinequantilen)  # truth
    qa = np.nanpercentile(s2d, meinequantilen)  # biased
    #
    diffs = qb - qa  # Difference of quantiles (1st and lst for interp)
    xp = qa
    xp[:-1] = m2  # x punkte der interpolation ( ? NAN )
    diffs[:-1] = m1 - m2  # y punkte der interpolation
    if return_mean:
        return m1, m2
    # interpolate quantile differences
    # how to handle end-point ?
    # if not extrapolate:
    #     diffs = diffs[1:-1] # trim
    #     xp = xp[1:-1]       # trim
    # Spline or linear interpolation
    if not linear:
        tck = interpolate.splrep(xp, diffs, s=0)
        if sample3 is not None:
            out = interpolate.splev(x[sample3], tck, der=0)  # does this retain nan ?
        else:
            out = interpolate.splev(s2d, tck, der=0)
    #
    else:
        # to all data in sample / but not when missing!
        if sample3 is not None:
            out = np.interp(x[sample3], xp, diffs)
        else:
            out = np.interp(s2d, xp, diffs)

    # turn missing into zero
    return np.where(np.isfinite(out), out, 0.)  # size of sample 2 or sample 3 # no adjustment
Exemple #22
0
    def apply(self, predictions, dimension=0):
        """Peak detection

        Parameter
        ---------
        predictions : SlidingWindowFeature
            Predictions returned by segmentation approaches.

        Returns
        -------
        segmentation : Timeline
            Partition.
        """

        if len(predictions.data.shape) == 1:
            y = predictions.data
        elif predictions.data.shape[1] == 1:
            y = predictions.data[:, 0]
        else:
            y = predictions.data[:, dimension]

        if self.log_scale:
            y = np.exp(y)

        sw = predictions.sliding_window

        precision = sw.step
        order = max(1, int(np.rint(self.min_duration / precision)))
        indices = scipy.signal.argrelmax(y, order=order)[0]

        if self.scale == 'absolute':
            mini = 0
            maxi = 1

        elif self.scale == 'relative':
            mini = np.nanmin(data)
            maxi = np.nanmax(data)

        elif self.scale == 'percentile':
            mini = np.nanpercentile(data, 1)
            maxi = np.nanpercentile(data, 99)

        threshold = mini + self.alpha * (maxi - mini)

        peak_time = np.array([sw[i].middle for i in indices if y[i] > threshold])

        n_windows = len(y)
        start_time = sw[0].start
        end_time = sw[n_windows].end

        boundaries = np.hstack([[start_time], peak_time, [end_time]])
        segmentation = Timeline()
        for i, (start, end) in enumerate(pairwise(boundaries)):
            segment = Segment(start, end)
            segmentation.add(segment)

        return segmentation
Exemple #23
0
def setCVflagByGroup(args, wide, dat):

    # Split design file by treatment group

    pdfOut = PdfPages(args.CVplot)
    CV = pd.DataFrame(index=wide.index)
    for title, group in dat.design.groupby(args.group):

        # Filter the wide file into a new dataframe
        currentFrame = wide[group.index]

        # Change dat.sampleIDs to match the design file
        dat.sampleIDs = group.index

        CV['cv_'+title], CVcutoff = setCVflag(args, currentFrame, dat, groupName=title)

    CV['cv'] = CV.apply(np.max, axis=1)
    if not args.CVcutoff:
        CVcutoff = np.nanpercentile(CV['cv'].values, q=90)
        CVcutoff = round(CVcutoff, -int(floor(log(abs(CVcutoff), 10))) + 2)
    else:
        CVcutoff = float(args.CVcutoff)
    for title, group in dat.design.groupby(args.group):
        fig, ax = plt.subplots()
        xmin = -np.nanpercentile(CV['cv_'+title].values,99)*0.2
        xmax = np.nanpercentile(CV['cv_'+title].values,99)*1.5
        ax.set_xlim(xmin, xmax)
        CV['cv_'+title].plot(kind='hist', range = (xmin, xmax), bins = 15, normed = 1, color = 'grey', label = "CV histogram")
        CV['cv_'+title].plot(kind='kde', title="Density Plot of Coefficients of Variation in " + args.group + " " + title, ax=ax, label = "CV density")
        plt.axvline(x=CVcutoff, color = 'red', linestyle = 'dashed', label = "Cutoff at: {0}".format(CVcutoff))
        plt.legend()
        pdfOut.savefig(fig, bbox_inches='tight')
        plt.close(fig)

    fig, ax = plt.subplots()
    xmin = -np.nanpercentile(CV['cv'].values,99)*0.2
    xmax = np.nanpercentile(CV['cv'].values,99)*1.5
    ax.set_xlim(xmin, xmax)

    # Create flag file instance
    CVflag = Flags(index=CV['cv'].index)

    for title, group in dat.design.groupby(args.group):
        CV['cv_'+title].plot(kind='kde', title="Density Plot of Coefficients of Variation by " + args.group, ax=ax, label = "CV density in group "+title)

        # Create new flag row for each group
        CVflag.addColumn(column='flag_feature_big_CV_' + title,
                     mask=((CV['cv_'+title].get_values() > CVcutoff) | CV['cv_'+title].isnull()))

    plt.axvline(x=CVcutoff, color = 'red', linestyle = 'dashed', label = "Cutoff at: {0}".format(CVcutoff))
    plt.legend()
    pdfOut.savefig(fig, bbox_inches='tight')
    plt.close(fig)
    pdfOut.close()

    # Write flag file
    CVflag.df_flags.to_csv(args.CVflag, sep='\t')
Exemple #24
0
def normalize_linear(np_array, lower_percentile, upper_percentile):
    lower_bound = np.nanpercentile(np_array, lower_percentile)
    upper_bound = np.nanpercentile(np_array, upper_percentile)

    np_array[np_array < lower_bound] = lower_bound
    np_array[np_array > upper_bound] = upper_bound
    np_array = np_array - lower_bound
    np_array = np_array / (upper_bound - lower_bound)
    return np_array
Exemple #25
0
    def get_dataframe(self, attr, measure='mean', sum=False, cum=False):
        """
        :rtype NDFrame
        """
        values = []
        for name in self.names:
            market_periods_by_date = self.periods_by_market_and_date[name]
            values.append([market_periods_by_date[date][attr] for date in self.dates])

        values = array(values)  # shape is names-dates-samples

        if cum:
            # Accumulate over the dates, the second axis.
            # shape is the same: names-dates-samples
            values = values.cumsum(axis=1)

        if sum:
            # Sum over the names, the first axis.
            # shape is dates-samples
            values = values.sum(axis=0)
            pass

        if measure == 'mean':
            values = values.mean(axis=-1)
        elif measure == 'std':
            values = values.std(axis=-1)
        elif measure == 'quantile':
            assert self.confidence_interval is not None
            low_percentile = (100 - self.confidence_interval) / 2.0
            high_percentile = 100 - low_percentile
            mean = values.mean(axis=-1)
            low = mean - nanpercentile(values, q=low_percentile, axis=-1)
            high = nanpercentile(values, q=high_percentile, axis=-1) - mean
            errors = []
            if sum:
                # Need to return 2-len(dates) sized array, for a Series.
                errors.append([low, high])
            else:
                # Need to return len(names)-2-len(dates) sized array, for a DateFrame.
                for i in range(len(self.names)):
                    errors.append([low[i], high[i]])
            values = array(errors)
            return values
        # elif measure == 'direct':
        #     raise NotImplementedError()
        #     if len(values) == 1:
        #         values = values[0]
        #     else:
        #         raise NotImplementedError()
        #     return DataFrame(values, index=dates, columns=names)
        else:
            raise Exception("Measure '{}' not supported".format(measure))

        if sum:
            return Series(values, index=self.dates)
        else:
            return DataFrame(values.T, index=self.dates, columns=self.names)
Exemple #26
0
    def _get_power_range(power_dict):
        # Calculate the power data range across each channel
        max_db = {}
        min_db = {}
        for channel, channel_data in power_dict.iteritems():
            all_power_data = np.concatenate(channel_data)
            max_db[channel] = np.nanpercentile(all_power_data, ZPLSPlot.upper_percentile)
            min_db[channel] = np.nanpercentile(all_power_data, ZPLSPlot.lower_percentile)

        return min_db, max_db
Exemple #27
0
def stats(arr):
    af = arr.flatten()

    box_bot = np.nanpercentile(af, 25.0)
    box_top = np.nanpercentile(af, 75.0)
    box_center = np.nanpercentile(af, 50.0)  # np.median(af)
    flier_low = np.nanpercentile(af, 0.0)  # np.min(af)
    flier_high = np.nanpercentile(af, 100.0)  # np.max(af)

    return flier_low, box_bot, box_center, box_top, flier_high
def test():
  import glob
  import analysis.experiment as exp
  reload(exp)  
  
  fn = exp.filename()
  print fn
  
  fn = filename(dtype = 'img')
  print fn
  print glob.glob(fn)
  
  data = exp.load(wid = 0);
  print data.shape
  
  import matplotlib.pyplot as plt
  plt.figure(1); plt.clf();
  img = exp.load_img(t=200000);
  plt.imshow(img, cmap = 'gray')
  
  #animate movie  
  import time

  fig, ax = plt.subplots()
  figimg = ax.imshow(img, cmap = 'gray');
  plt.show();
  
  for t in range(200000, 300000):
    figimg.set_data(exp.load_img(t=t));
    ax.set_title('t=%d' % t);
    time.sleep(0.001)
    fig.canvas.draw()
    fig.canvas.flush_events()
    
  reload(exp)
  sbins = exp.stage_bins(wid = [0,1])
  d = exp.load_stage_binned(wid = [0,1], nbins_per_stage=10) 
  
  a = exp.load_aligned(wid = all, align='time', dtype = 'speed')  
  a_th = np.nanpercentile(a, 85);
  a[a> a_th] = a_th;
  a[np.isnan(a)] = -1.0;
  
  import analysis.plot as fplt;
  fplt.plot_array(a)
  
  a = exp.load_aligned(wid = all, align='L2', dtype = 'speed')  
  a_th = np.nanpercentile(a, 85);
  a[a> a_th] = a_th;
  a[np.isnan(a)] = -1.0;
  
  import analysis.plot as fplt;
  fplt.plot_array(a)
  
Exemple #29
0
    def contourf_date_lat(self, ax, whichcolumn='wind', updown='up', **kwargs):
        """ A contourf of multiple-day wind versus date and latitude.

        Args:
            ax: axis handle
            whichcolumn: string, 'wind', 'winde', 'windn'.
            updown: string, 'up' or 'down'
            **kwargs: for contourf
        Return:
            hc: handle of the contourf plot
        ----------------------------------------
        Note: x axis is days from '2000-1-1'
        """
        from matplotlib.ticker import AutoMinorLocator
        from scipy.interpolate import griddata
        if not self.empty:
            #self['epochday'] = (self.index-self.index.min())/pd.Timedelta('1D')
            self['epochday'] = (self.index-pd.Timestamp('2000-1-1'))/pd.Timedelta('1D')
            btime = self['epochday'].min()
            etime = self['epochday'].max()

            isup, isdown = mf.updown(self.lat)
            tmp = self[isup] if updown is 'up' else self[isdown]

            ut0 = np.arange(np.floor(btime), np.floor(etime)+1+0.5/24, 0.5/24)
            lat0 = np.arange(-90,91,3)
            ut, lat = np.meshgrid(ut0, lat0)
            windt = griddata((tmp['epochday'], tmp.lat), tmp[whichcolumn], (ut, lat),
                             method='linear', rescale=True)
            for index, k in enumerate(ut0):
                fp = abs(tmp['epochday']-k)<0.5/24
                if not fp.any():
                    windt[:,index]=np.nan
            hc = ax.contourf(
                    ut, lat, windt,
                    levels=np.linspace(np.nanpercentile(windt,1),np.nanpercentile(windt,99),11),
                    **kwargs)
            ax.set_xlim(np.floor(btime),np.floor(etime)+1)
            ax.set_xticks(np.arange(np.floor(btime),np.floor(etime)+2))
            ax.set_xticklabels(
                    pd.date_range(tmp.index[0],tmp.index[-1]+pd.Timedelta('1d')).strftime('%j'))
            ax.set_ylim(-90,90)
            ax.set_yticks(np.arange(-90,91,30))
            ax.xaxis.set_minor_locator(AutoMinorLocator(4))
            ax.yaxis.set_minor_locator(AutoMinorLocator(3))
            ax.tick_params(which='both', width=1.2)
            ax.tick_params(which='major', length=7)
            ax.tick_params(which='minor', length=4)
            ax.set_title('LT: {:.1f}'.format(tmp['LT'].median()))
            ax.set_xlabel('Day of {:d}'
                          .format(tmp.index[0].year),fontsize=14)
            ax.set_ylabel('Latitude', fontsize=14)
            return hc#, windt
def LST_compare(day, t, ndi_thres):
    # get matching radiometer time step for flight    
    dateCol = met.SelectTimestep(metTime, flightDates[day][t])
    
    # get meteorological variables
    Rlu = metData['Rl_up_Wm2_EC'][dateCol][0]
    Rld = metData['Rl_down_Wm2_EC'][dateCol][0]

    # calculate LST from radiometer
#        Trad_EC_v1 = ((Rlu - emisAtm * sb * (1 - emis) * Ta**4)/(emis* \
#            sb))**(1./4) - 273.16
    Trad_EC = ((Rlu - (1 - emis)*Rld)/(emis* \
        sb))**(1./4) - 273.16
    
    # get UAV LST data
    image_TIR = 'TEMP_Mosaic_%s_%s_rect_rs03_noV_lowElev' % (t, day)
    im_TIR = os.path.join(direct_TIR, image_TIR + '.tif')
    
    fid_TIR=gdal.Open(im_TIR ,gdal.GA_ReadOnly)    
    
    lst_UAV = fid_TIR.GetRasterBand(1).ReadAsArray() 
    lst_UAV[lst_UAV <= -99] = np.nan
    
    # get NDI data
    image_ndi = 'NDI_%s_%s_rs03_unclip' % (day, t)        
    im_ndi = os.path.join(direct_ndi, image_ndi + '.tif')
    
    fid_ndi=gdal.Open(im_ndi ,gdal.GA_ReadOnly)    
    ndi =fid_ndi.GetRasterBand(1).ReadAsArray()
    ndi[ndi <= ndi_thres] = 0
    ndi_mask = ndi > 0
    
    # mask lst by ndi
    lst_UAV[~ndi_mask] = np.nan
    
    # The 10 % quantile is now used for comparison. To avoid that the 
    # coolboxes are also evaluated the lowest 3 % are removed beforehand.
    lower = lst_UAV.flatten()
    without_lowest = lower.copy()
    without_lowest[lower <= np.nanpercentile(lower,0.5)] = np.nan
    low_LST= np.nanpercentile(without_lowest, 10) 
    #low_LST= np.nanpercentile(lower, 10) 
    UAV_LST = np.nanmean(lst_UAV[lst_UAV < low_LST])
    
    LST_UAV.append(round(UAV_LST,1))
    LST_EC.append(round(Trad_EC,1))
    Days.append(day[1:])
    Dates.append(day[1:] + '_' + t[1:])
    compareTrad[day + '_' + t] = { 'UAV' : round(np.nanmean(UAV_LST),1), 
                                    'EC'  : round(Trad_EC,1),
                                    'Day'  : day[1:]}        
    
    return LST_UAV, LST_EC, Days, compareTrad
def summarize_values(values,
                     summary_type=None,
                     threshold=5e-2,
                     decimal_count=1,
                     display_n=True):
    s = {}
    total_len = np.nansum([len(values[x]) for x in values])
    for key in values:
        s[key] = ''
        v = values[key]
        if summary_type == 1.0:  # numeric
            data1 = values['Dood - totaal'][~values['Dood - totaal'].isna()]
            data2 = values['Levend ontslagen en niet heropgenomen - totaal'][
                ~values['Levend ontslagen en niet heropgenomen - totaal'].isna(
                )]
            normalresult1 = ss.normaltest(data1)
            normalresult2 = ss.normaltest(data2)
            if normalresult1.pvalue < threshold or normalresult2.pvalue < threshold:
                # not normal: use median
                if len(v) - np.nansum(v.isna()) > 0:
                    n = len(v) - np.nansum(v.isna())
                    median = np.nanmedian(v)
                    iqr1 = np.nanpercentile(v, 25)
                    iqr3 = np.nanpercentile(v, 75)

                    if display_n:
                        s[key] = [
                            format(
                                str(round(median, decimal_count)) + ' (' +
                                str(round(iqr1, decimal_count)) + '-' +
                                str(round(iqr3, decimal_count)) + ')' +
                                '\n(n=' + str(n) + ')')
                        ]
                    else:
                        s[key] = [
                            format(
                                str(round(median, decimal_count)) + ' (' +
                                str(round(iqr1, decimal_count)) + '-' +
                                str(round(iqr3, decimal_count)) + ')')
                        ]
            else:
                # normal: use mean
                if len(v) - np.nansum(v.isna()) > 0:
                    n = len(v) - np.nansum(v.isna())
                    p = (len(v) - np.nansum(v.isna())) / total_len * 100
                    mean = np.nanmean(v)
                    std = np.nanstd(v)

                    if display_n:
                        s[key] = [
                            format(
                                str(round(mean, decimal_count)) + ' ± ' +
                                str(round(std, decimal_count)) + '\n(n=' +
                                str(n) + ')')
                        ]
                    else:
                        s[key] = [
                            format(
                                str(round(mean, decimal_count)) + ' ± ' +
                                str(round(std, decimal_count)))
                        ]
        elif summary_type == 2.0:  # binary
            n = len(v) - np.nansum(
                v.isna())  # total n available for this variable
            p = sum(v == 1) / n * 100  # percentage True
            if n == 0:
                p = 0
            if display_n:
                s[key] = [format(str(int(p)) + '%' + '\n(n=' + str(n) + ')')]
            else:
                s[key] = [format(str(int(p)) + '%')]

        elif summary_type == 3.0:  # binary
            n = len(v) - np.nansum(v.isna())
            median = np.nanmedian(v)
            iqr1 = np.nanpercentile(v, 25)
            iqr3 = np.nanpercentile(v, 75)
            s[key] = [
                format(
                    str(round(median, decimal_count)) + ' (' +
                    str(round(iqr1, decimal_count)) + '-' +
                    str(round(iqr3, decimal_count)))
            ]
            if display_n:
                s[key] = [
                    format(
                        str(round(median, decimal_count)) + ' (' +
                        str(round(iqr1, decimal_count)) + '-' +
                        str(round(iqr3, decimal_count)) + '\n(n=' + str(n) +
                        ')')
                ]
            else:
                s[key] = [
                    format(
                        str(round(median, decimal_count)) + ' (' +
                        str(round(iqr1, decimal_count)) + '-' +
                        str(round(iqr3, decimal_count)))
                ]
        elif summary_type == 4.0:
            try:
                n = len(v) - np.nansum(v.isna())
                median = np.nanmedian(v)
                iqr1 = np.nanpercentile(v, 25)
                iqr3 = np.nanpercentile(v, 75)
                s[key] = [
                    format(
                        str(round(median, decimal_count)) + ' (' +
                        str(round(iqr1, decimal_count)) + '-' +
                        str(round(iqr3, decimal_count)))
                ]
                if display_n:
                    s[key] = [
                        format(
                            str(round(median, decimal_count)) + ' (' +
                            str(round(iqr1, decimal_count)) + '-' +
                            str(round(iqr3, decimal_count)) + '\n(n=' +
                            str(n) + ')')
                    ]
                else:
                    s[key] = [
                        format(
                            str(round(median, decimal_count)) + ' (' +
                            str(round(iqr1, decimal_count)) + '-' +
                            str(round(iqr3, decimal_count)))
                    ]
            except EXC as Exception:
                v = [float(v1) for v1 in v if v1 is not None]
                n = len(v) - np.nansum(v.isna())
                median = np.nanmedian(v)
                iqr1 = np.nanpercentile(v, 25)
                iqr3 = np.nanpercentile(v, 75)
                if display_n:
                    s[key] = [
                        format(
                            str(round(median, decimal_count)) + ' (' +
                            str(round(iqr1, decimal_count)) + '-' +
                            str(round(iqr3, decimal_count)) + '\n(n=' +
                            str(n) + ')')
                    ]
                else:
                    s[key] = [
                        format(
                            str(round(median, decimal_count)) + ' (' +
                            str(round(iqr1, decimal_count)) + '-' +
                            str(round(iqr3, decimal_count)))
                    ]

        elif summary_type is None or summary_type == 'n_percn_meansd_medianiqr':
            if len(v) - np.nansum(v.isna()) > 0:
                n = len(v) - np.nansum(v.isna())
                p = (len(v) - np.nansum(v.isna())) / total_len * 100
                mean = np.nanmean(v)
                std = np.nanstd(v)
                median = np.nanmedian(v)
                iqr1 = np.nanpercentile(v, 25)
                iqr3 = np.nanpercentile(v, 75)
                s[key] = [
                    format('n = ' + str(n) + '\n' + str(int(p)) + '%\n' +
                           str(round(mean, decimal_count)) + ' ± ' +
                           str(round(std, decimal_count)) + '\n' +
                           str(round(median, decimal_count)) + ' (' +
                           str(round(iqr1, decimal_count)) + '-' +
                           str(round(iqr3, decimal_count)) + ')')
                ]
            elif len(v) - np.nansum(v.isna()) == 0:
                n = len(v) - np.nansum(v.isna())
                p = (len(v) - np.nansum(v.isna())) / total_len * 100
                s[key] = [format('n = ' + str(n) + ';\n' + str(int(p)) + '%')]
            else:
                s[key] = ['n/a']
    return s
Exemple #32
0
def summary_plot(shap_values,
                 features,
                 feature_names=None,
                 max_display=20,
                 plot_type="dot",
                 color="#ff0052",
                 axis_color="#333333",
                 title=None,
                 alpha=1,
                 show=True,
                 sort=True):
    """
    Create a SHAP summary plot, colored by feature values when they are provided.

    Parameters
    ----------
    shap_values : numpy.array
        Matrix of SHAP values (# samples x # features)

    features : numpy.array or pandas.DataFrame or list
        Matrix of feature values (# samples x # features) or a feature_names list as shorthand

    feature_names : list
        Names of the features (length # features)

    max_display : int
        How many top features to include in the plot

    plot_type : "dot" (default) or "violin"
        What type of summary plot to produce
    """

    # convert from a DataFrame or other types
    if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>":
        if feature_names is None:
            feature_names = features.columns
        features = features.as_matrix()
    elif str(type(features)) == "<class 'list'>":
        if feature_names is None:
            feature_names = features
        features = None
    elif len(features.shape) == 1 and feature_names is None:
        feature_names = features
        features = None

    if sort:
        # order features by the sum of their effect magnitudes
        feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0)[:-1])
        feature_order = feature_order[-min(max_display, len(feature_order)):]
    else:
        feature_order = np.flip(
            np.arange(min(max_display, shap_values.shape[1] - 1)), 0)

    row_height = 0.4
    pl.gcf().set_size_inches(7, len(feature_order) * row_height + 0.6)
    pl.axvline(x=0, color="#999999", zorder=-1)

    if plot_type == "dot":
        for pos, i in enumerate(feature_order):
            pl.axhline(y=pos,
                       color="#cccccc",
                       lw=0.5,
                       dashes=(1, 5),
                       zorder=-1)
            shaps = shap_values[:, i]
            N = len(shaps)
            hspacing = (np.max(shaps) - np.min(shaps)) / 200
            curr_bin = []
            nbins = 100
            quant = np.round(nbins * (shap_values[:, i] - np.min(shaps)) /
                             (np.max(shaps) - np.min(shaps) + 1e-8))
            inds = np.argsort(quant + np.random.randn(N) * 1e-6)
            layer = 0
            last_bin = -1
            ys = np.zeros(N)
            for ind in inds:
                if quant[ind] != last_bin:
                    layer = 0
                ys[ind] = layer * ((layer % 2) * 2 - 1)
                layer += 1
                last_bin = quant[ind]
            ys *= row_height / np.max(ys + 1)

            if features is not None:
                vmin = np.nanpercentile(features[:, i], 5)
                vmax = np.nanpercentile(features[:, i], 95)
                assert features.shape[0] == len(
                    shaps
                ), "Feature and SHAP matrices must have the same number of rows!"
                pl.scatter(shaps,
                           pos + ys,
                           cmap=red_blue,
                           vmin=vmin,
                           vmax=vmax,
                           s=16,
                           c=np.nan_to_num(features[:, i]),
                           alpha=alpha,
                           linewidth=0,
                           zorder=3)
            else:
                pl.scatter(shaps,
                           pos + ys,
                           s=16,
                           alpha=alpha,
                           linewidth=0,
                           zorder=3,
                           color=color)

    elif plot_type == "violin":
        for pos, i in enumerate(feature_order):
            pl.axhline(y=pos,
                       color="#cccccc",
                       lw=0.5,
                       dashes=(1, 5),
                       zorder=-1)

        if features is not None:
            global_low = np.nanpercentile(
                shap_values[:, :len(feature_names)].flatten(), 1)
            global_high = np.nanpercentile(
                shap_values[:, :len(feature_names)].flatten(), 99)
            for pos, i in enumerate(feature_order):
                shaps = shap_values[:, i]
                shap_min, shap_max = np.min(shaps), np.max(shaps)
                rng = shap_max - shap_min
                xs = np.linspace(
                    np.min(shaps) - rng * 0.2,
                    np.max(shaps) + rng * 0.2, 100)
                if np.std(shaps) < (global_high - global_low) / 100:
                    ds = gaussian_kde(shaps + np.random.randn(len(shaps)) *
                                      (global_high - global_low) / 100)(xs)
                else:
                    ds = gaussian_kde(shaps)(xs)
                ds /= np.max(ds) * 3

                values = features[:, i]
                window_size = max(10, len(values) // 20)
                smooth_values = np.zeros(len(xs) - 1)
                for j in range(len(xs) - 1):
                    smooth_values[j] = np.mean(
                        values[max(0, j -
                                   window_size):min(len(xs), j + window_size)])

                vmin = np.nanpercentile(values, 5)
                vmax = np.nanpercentile(values, 95)
                # smooth_values -= np.nanpercentile(smooth_values, 5)
                # smooth_values /= np.nanpercentile(smooth_values, 95)
                smooth_values -= vmin
                smooth_values /= vmax - vmin
                for i in range(len(xs) - 1):
                    if ds[i] > 0.05 or ds[i + 1] > 0.05:
                        pl.fill_between([xs[i], xs[i + 1]],
                                        [pos + ds[i], pos + ds[i + 1]],
                                        [pos - ds[i], pos - ds[i + 1]],
                                        color=red_blue(smooth_values[i]),
                                        zorder=2)

                vmin = np.nanpercentile(values, 5)
                vmax = np.nanpercentile(values, 95)
                pl.scatter(shaps,
                           np.ones(shap_values.shape[0]) * pos,
                           s=9,
                           cmap=red_blue,
                           vmin=vmin,
                           vmax=vmax,
                           c=values,
                           alpha=alpha,
                           linewidth=0,
                           zorder=3)

        else:
            parts = pl.violinplot(shap_values[:, feature_order],
                                  range(len(feature_order)),
                                  points=200,
                                  vert=False,
                                  widths=0.7,
                                  showmeans=False,
                                  showextrema=False,
                                  showmedians=False)

            for pc in parts['bodies']:
                pc.set_facecolor(color)
                pc.set_edgecolor('none')
                pc.set_alpha(alpha)

    pl.gca().xaxis.set_ticks_position('bottom')
    pl.gca().yaxis.set_ticks_position('none')
    pl.gca().spines['right'].set_visible(False)
    pl.gca().spines['top'].set_visible(False)
    pl.gca().spines['left'].set_visible(False)
    pl.gca().tick_params(color=axis_color, labelcolor=axis_color)
    pl.yticks(range(len(feature_order)),
              [feature_names[i] for i in feature_order],
              fontsize=13)
    pl.gca().tick_params('y', length=20, width=0.5, which='major')
    pl.gca().tick_params('x', labelsize=11)
    pl.ylim(-1, len(feature_order))
    pl.xlabel("SHAP value (impact on model output)", fontsize=13)
    pl.tight_layout()
    if show: pl.show()
def GetPoissonEstimates(bins, SNFinalPos, SNFinalNeg, LimitN, MinSN):

    ProbPoisson = []
    ProbPoissonE1 = []
    ProbPoissonE2 = []
    ProbNegativeOverPositive = []
    ProbNegativeOverPositiveE1 = []
    ProbNegativeOverPositiveE2 = []
    ProbPoissonExpected = []
    ProbPoissonExpectedE1 = []
    ProbPoissonExpectedE2 = []
    ProbNegativeOverPositiveDif = []
    ProbNegativeOverPositiveDifE1 = []
    ProbNegativeOverPositiveDifE2 = []
    PurityPoisson = []
    Nnegative = []
    NnegativeReal = []
    NPositive = []
    Nnegative_e1 = []
    Nnegative_e2 = []

    for sn in bins:
        if len(SNFinalPos[SNFinalPos >= sn]) > 0:
            Fraction, FractionE1, FractionE2 = GetPoissonErrorGivenMeasurements(
                len(SNFinalNeg[SNFinalNeg >= sn]),
                len(SNFinalPos[SNFinalPos >= sn]))

            if Fraction > 1.0:
                Fraction = 1.0
                FractionE1 = 0.0
                FractionE2 = 0.0
            else:
                pass

            ProbNegativeOverPositive.append(Fraction)
            ProbNegativeOverPositiveE1.append(FractionE1)
            ProbNegativeOverPositiveE2.append(FractionE2)
        elif len(SNFinalNeg[SNFinalNeg >= sn]) > 0:
            ProbNegativeOverPositive.append(1.0)
            ProbNegativeOverPositiveE1.append(0.0)
            ProbNegativeOverPositiveE2.append(0.0)
        else:
            ProbNegativeOverPositive.append(0.0)
            ProbNegativeOverPositiveE1.append(0.0)
            ProbNegativeOverPositiveE2.append(0.0)

        if len(SNFinalPos[(SNFinalPos >= sn) & (SNFinalPos < sn + 0.1)]) > 0:
            Fraction, FractionE1, FractionE2 = GetPoissonErrorGivenMeasurements(
                len(SNFinalNeg[(SNFinalNeg >= sn) & (SNFinalNeg < sn + 0.1)]),
                len(SNFinalPos[(SNFinalPos >= sn) & (SNFinalPos < sn + 0.1)]))
            if Fraction > 1.0:
                Fraction = 1.0
                FractionE1 = 0.0
                FractionE2 = 0.0
            else:
                pass

            ProbNegativeOverPositiveDif.append(min(1.0, Fraction))
            ProbNegativeOverPositiveDifE1.append(FractionE1)
            ProbNegativeOverPositiveDifE2.append(FractionE2)
        elif len(SNFinalNeg[(SNFinalNeg >= sn) & (SNFinalNeg < sn + 0.1)]) > 0:
            ProbNegativeOverPositiveDif.append(1.0)
            ProbNegativeOverPositiveDifE1.append(0.0)
            ProbNegativeOverPositiveDifE2.append(0.0)
        else:
            ProbNegativeOverPositiveDif.append(0.0)
            ProbNegativeOverPositiveDifE1.append(0.0)
            ProbNegativeOverPositiveDifE2.append(0.0)

        k = len(SNFinalNeg[SNFinalNeg >= sn])
        aux = scipy.special.gammaincinv(k + 1, [0.16, 0.5, 0.84])
        NnegativeReal.append(k)
        Nnegative.append(aux[1])
        Nnegative_e1.append(aux[1] - aux[0])
        Nnegative_e2.append(aux[2] - aux[1])
        NPositive.append(1.0 * len(SNFinalPos[SNFinalPos >= sn]))

    Nnegative = np.array(Nnegative)
    NPositive = np.array(NPositive)
    NnegativeReal = np.array(NnegativeReal)
    Nnegative_e1 = np.array(Nnegative_e1)
    Nnegative_e2 = np.array(Nnegative_e2)

    MinSNtoFit = min(bins)
    UsableBins = len(
        Nnegative[bins >= MinSNtoFit][Nnegative[bins >= MinSNtoFit] > LimitN])

    AuxiliarOutput = open('SN_UsedInFit.dat', 'w')
    print('Min SN to do the fit:', round(MinSNtoFit, 1),
          ', Number of usable bins:', UsableBins)
    AuxiliarOutput.write(
        str(round(MinSNtoFit, 1)) + ' ' + str(UsableBins) + '\n')
    if UsableBins < 6:
        print('*** We are using ', UsableBins,
              ' points for the fitting of the negative counts ***')
        print(
            '*** We usually get good results with 6 points, try reducing the parameter -MinSN ***'
        )
    while UsableBins > 6:
        MinSNtoFit = MinSNtoFit + 0.1
        UsableBins = len(Nnegative[bins >= MinSNtoFit][
            Nnegative[bins >= MinSNtoFit] > LimitN])
        print('Min SN to do the fit:', round(MinSNtoFit, 1),
              ', Number of usable bins:', UsableBins)
        AuxiliarOutput.write(
            str(round(MinSNtoFit, 1)) + ' ' + str(UsableBins) + '\n')

        if MinSNtoFit > max(bins):
            print('No negative points to do the fit')
            exit()
    AuxiliarOutput.close()

    if UsableBins >= 3:
        try:
            # popt, pcov = curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[1e6,1])
            popt, pcov = curve_fit(
                NegativeRateLog,
                bins[bins >= MinSNtoFit][
                    Nnegative[bins >= MinSNtoFit] > LimitN],
                np.log10(Nnegative[bins >= MinSNtoFit][
                    Nnegative[bins >= MinSNtoFit] > LimitN]),
                p0=[1e6, 1],
                sigma=np.log10(
                    np.average([
                        Nnegative_e1[bins >= MinSNtoFit][
                            Nnegative[bins >= MinSNtoFit] > LimitN],
                        Nnegative_e2[bins >= MinSNtoFit][
                            Nnegative[bins >= MinSNtoFit] > LimitN]
                    ],
                               axis=0)),
                absolute_sigma=False)

            perr = np.sqrt(np.diag(pcov))
            # print popt,popt/perr,not np.isfinite(perr[0])
            CounterFitTries = 0
            while not np.isfinite(perr[0]):
                print('*** curve_fit failed to converge ... ***')
                NewParameter1 = np.power(10, np.random.uniform(1, 9))
                NewParameter2 = np.random.uniform(0.1, 2.0)
                print('*** New Initial Estimates for the fitting (random):',
                      round(NewParameter1), round(NewParameter2, 2), ' ***')
                # popt, pcov = curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[NewParameter1,NewParameter2])
                popt, pcov = curve_fit(
                    NegativeRateLog,
                    bins[bins >= MinSNtoFit][
                        Nnegative[bins >= MinSNtoFit] > LimitN],
                    np.log10(Nnegative[bins >= MinSNtoFit][
                        Nnegative[bins >= MinSNtoFit] > LimitN]),
                    p0=[NewParameter1, NewParameter2],
                    sigma=np.log10(
                        np.average([
                            Nnegative_e1[bins >= MinSNtoFit][
                                Nnegative[bins >= MinSNtoFit] > LimitN],
                            Nnegative_e2[bins >= MinSNtoFit][
                                Nnegative[bins >= MinSNtoFit] > LimitN]
                        ],
                                   axis=0)),
                    absolute_sigma=False)
                perr = np.sqrt(np.diag(pcov))
                print('*** New Results: N:', round(popt[0]), ' +/- ',
                      round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ',
                      round(perr[1], 2), ' ***')
                CounterFitTries += 1
                if CounterFitTries > 100:
                    print('*** Over 100 attemps and no good fit *** ')
                    break

        except:
            print('Fitting failed for LimitN:' + str(LimitN) + ' and ' +
                  str(MinSN) + '... Will force LimitN=0')
            # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[1e6,1])
            popt, pcov = curve_fit(NegativeRateLog,
                                   bins[Nnegative > 0],
                                   np.log10(Nnegative[Nnegative > 0]),
                                   p0=[1e6, 1],
                                   sigma=np.log10(
                                       np.average([
                                           Nnegative_e1[Nnegative > 0],
                                           Nnegative_e2[Nnegative > 0]
                                       ],
                                                  axis=0)),
                                   absolute_sigma=False)
            perr = np.sqrt(np.diag(pcov))
            # print popt,popt/perr,not np.isfinite(perr[0])
            CounterFitTries = 0
            while not np.isfinite(perr[0]):
                print('*** curve_fit failed to converge ... ***')
                NewParameter1 = np.power(10, np.random.uniform(1, 9))
                NewParameter2 = np.random.uniform(0.1, 2.0)
                print('*** New Initial Estimates for the fitting (random):',
                      round(NewParameter1), round(NewParameter2, 2), ' ***')
                # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[NewParameter1,NewParameter2])
                popt, pcov = curve_fit(NegativeRateLog,
                                       bins[Nnegative > 0],
                                       np.log10(Nnegative[Nnegative > 0]),
                                       p0=[NewParameter1, NewParameter2],
                                       sigma=np.log10(
                                           np.average([
                                               Nnegative_e1[Nnegative > 0],
                                               Nnegative_e2[Nnegative > 0]
                                           ],
                                                      axis=0)),
                                       absolute_sigma=False)
                perr = np.sqrt(np.diag(pcov))
                print('*** New Results: N:', round(popt[0]), ' +/- ',
                      round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ',
                      round(perr[1], 2), ' ***')
                CounterFitTries += 1
                if CounterFitTries > 100:
                    print('*** Over 100 attemps and no good fit *** ')
                    break
    else:
        print('Number of usable bins is less than 3 for LimitN:' +
              str(LimitN) + ' and ' + str(MinSN) + '... Will force LimitN=0')
        # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[1e6,1])
        popt, pcov = curve_fit(
            NegativeRateLog,
            bins[Nnegative > 0],
            np.log10(Nnegative[Nnegative > 0]),
            p0=[1e6, 1],
            sigma=np.log10(
                np.average(
                    [Nnegative_e1[Nnegative > 0], Nnegative_e2[Nnegative > 0]],
                    axis=0)),
            absolute_sigma=False)
        perr = np.sqrt(np.diag(pcov))
        # print popt,popt/perr,not np.isfinite(perr[0])
        CounterFitTries = 0
        while not np.isfinite(perr[0]):
            print('*** curve_fit failed to converge ... ***')
            NewParameter1 = np.power(10, np.random.uniform(1, 9))
            NewParameter2 = np.random.uniform(0.1, 2.0)
            print('*** New Initial Estimates for the fitting (random):',
                  round(NewParameter1), round(NewParameter2, 2), ' ***')
            # popt, pcov = curve_fit(NegativeRate, bins[Nnegative>0], Nnegative[Nnegative>0],p0=[NewParameter1,NewParameter2])
            popt, pcov = curve_fit(NegativeRateLog,
                                   bins[Nnegative > 0],
                                   np.log10(Nnegative[Nnegative > 0]),
                                   p0=[NewParameter1, NewParameter2],
                                   sigma=np.log10(
                                       np.average([
                                           Nnegative_e1[Nnegative > 0],
                                           Nnegative_e2[Nnegative > 0]
                                       ],
                                                  axis=0)),
                                   absolute_sigma=False)
            perr = np.sqrt(np.diag(pcov))
            print('*** New Results: N:', round(popt[0]), ' +/- ',
                  round(perr[0]), ' Sigma:', round(popt[1], 2), ' +/- ',
                  round(perr[1], 2), ' ***')
            CounterFitTries += 1
            if CounterFitTries > 100:
                print('*** Over 100 attemps and no good fit *** ')
                break

    NegativeFitted = NegativeRate(bins, popt[0], popt[1])
    SNPeakGaussian = (popt / np.sqrt(np.diag(pcov)))[0]
    # print 'SNPeakGaussian',SNPeakGaussian,popt,np.sqrt(np.diag(pcov))
    # print curve_fit(NegativeRate, bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN], Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],p0=[1e6,1],sigma=np.average([Nnegative_e1[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],Nnegative_e2[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]],axis=0),absolute_sigma=False)
    # print curve_fit(NegativeRateLog,
    # 				bins[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],
    # 				np.log10(Nnegative[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]),
    # 				p0=[1e6,1],
    # 				sigma=np.log10(np.average([Nnegative_e1[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN],Nnegative_e2[bins>=MinSNtoFit][Nnegative[bins>=MinSNtoFit]>LimitN]],axis=0)),
    # 				absolute_sigma=False)

    for i in range(len(bins)):
        aux = []
        auxExpected = []
        for j in range(1000):
            lamb = np.random.normal(NegativeFitted[i],
                                    NegativeFitted[i] / SNPeakGaussian)
            while lamb < 0:
                lamb = np.random.normal(NegativeFitted[i],
                                        NegativeFitted[i] / SNPeakGaussian)
            aux.append(1 - scipy.special.gammaincc(0 + 1, lamb))
            if i == len(bins) - 1:
                if NPositive[i] > 0:
                    auxExpected.append(1.0 - max(0, NPositive[i] - lamb) /
                                       NPositive[i])
                else:
                    auxExpected.append(0.0)
            else:
                # lamb2 = lamb - np.random.normal(NegativeFitted[i+1],NegativeFitted[i+1]/SNPeakGaussian)
                lamb2 = (NegativeFitted[i] -
                         NegativeFitted[i + 1]) * lamb / NegativeFitted[i]
                while lamb2 < 0:
                    lamb2 = lamb - np.random.normal(
                        NegativeFitted[i + 1],
                        NegativeFitted[i + 1] / SNPeakGaussian)
                if (NPositive[i] - NPositive[i + 1]) > 0:
                    auxExpected.append(
                        1.0 -
                        max(0, (NPositive[i] - NPositive[i + 1]) - lamb2) /
                        (NPositive[i] - NPositive[i + 1]))
                else:
                    auxExpected.append(0.0)
                    # auxExpected.append(1.0-max(0,0.7 - lamb2)/0.7)

        PP = np.nanpercentile(aux, [16, 50, 84])
        PPExpected = np.nanpercentile(auxExpected, [16, 50, 84])
        ProbPoisson.append(PP[1])
        ProbPoissonE1.append(PP[1] - PP[0])
        ProbPoissonE2.append(PP[2] - PP[1])

        ProbPoissonExpected.append(PPExpected[1])
        ProbPoissonExpectedE1.append(PPExpected[1] - PPExpected[0])
        ProbPoissonExpectedE2.append(PPExpected[2] - PPExpected[1])
        # if i<len(bins)-1:
        # 	print bins[i],PPExpected,NegativeFitted[i],NPositive[i],NPositive[i+1]
        if NPositive[i] > 0:
            PurityPoisson.append(
                max((NPositive[i] - NegativeFitted[i]) / NPositive[i], 0))
        else:
            PurityPoisson.append(0.0)

    ProbPoisson = np.array(ProbPoisson)
    ProbPoissonE1 = np.array(ProbPoissonE1)
    ProbPoissonE2 = np.array(ProbPoissonE2)
    ProbNegativeOverPositive = np.array(ProbNegativeOverPositive)
    ProbNegativeOverPositiveE1 = np.array(ProbNegativeOverPositiveE1)
    ProbNegativeOverPositiveE2 = np.array(ProbNegativeOverPositiveE2)
    ProbNegativeOverPositiveDif = np.array(ProbNegativeOverPositiveDif)
    ProbNegativeOverPositiveDifE1 = np.array(ProbNegativeOverPositiveDifE1)
    ProbNegativeOverPositiveDifE2 = np.array(ProbNegativeOverPositiveDifE2)
    ProbPoissonExpected = np.array(ProbPoissonExpected)
    ProbPoissonExpectedE1 = np.array(ProbPoissonExpectedE1)
    ProbPoissonExpectedE2 = np.array(ProbPoissonExpectedE2)
    PurityPoisson = np.array(PurityPoisson)

    output = [
        bins, ProbPoisson, ProbNegativeOverPositive, PurityPoisson, NPositive,
        Nnegative, Nnegative_e1, Nnegative_e2, NegativeFitted, NnegativeReal,
        ProbPoissonE1, ProbPoissonE2, ProbNegativeOverPositiveE1,
        ProbNegativeOverPositiveE2, ProbNegativeOverPositiveDif,
        ProbNegativeOverPositiveDifE1, ProbNegativeOverPositiveDifE2,
        ProbPoissonExpected, ProbPoissonExpectedE1, ProbPoissonExpectedE2
    ]

    return output
Exemple #34
0
 def cutoff(self, recorded):
     if not recorded:
         return None
     return np.nanpercentile(list(recorded.values()),
                             (1 - 1 / self.rf) * 100)
def plot_representational_similarity(rs, dims=None, dim_labels=None, colors=None, dim_order=None, labels=True):
    if np.all(np.isnan(rs)):
        return # if rs is all NaN (happens with only 1 cell), there is nothing to plot
    if dim_order is not None:
        rsr = np.arange(len(rs)).reshape(*map(len,dims))
        rsrt = rsr.transpose(dim_order)
        ri = rsrt.flatten()
        rs = rs[ri,:][:,ri]

        dims = np.array(dims)[dim_order]
        colors = np.array(colors)[dim_order]
        dim_labels = np.array(dim_labels)[dim_order]
    
    # force the color map to be centered at zero
    clim = np.nanpercentile(rs, [5.0,95.0], axis=None)
    vrange = max(abs(clim[0]), abs(clim[1]))

    rs = rs.copy()
    np.fill_diagonal(rs, np.nan)
 
    if labels:
        grid = ImageGrid(plt.gcf(), 111,
                         nrows_ncols=(1,1),
                         cbar_location="right",
                         cbar_mode="single",
                         cbar_size="7%",
                         cbar_pad=0.05)
        
        for ax in grid: pass
    else:
        ax = plt.gca()

    im = ax.imshow(rs, interpolation='nearest', cmap='RdBu_r', vmin=-vrange, vmax=vrange)
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_xticks([])
    ax.set_yticks([])

    if labels:
        cbar = ax.cax.colorbar(im)
        cbar.set_label_text('stimulus correlation')
    
    if dims is not None:
        dim_labels = ["%s(%s)" % (dim_labels[i],', '.join(map(float_label, dims[i].tolist()))) for i in range(len(dims)) ]
        dim_handlers = [ DimensionPatchHandler(dims[i], colors[i], 'w') for i in range(len(dims)) ]

        n = len(rs)
        for cell_i in range(n):
            idx = np.unravel_index(cell_i, map(len, dims))

            start = -(len(dims))*2
            width = 1.8
            for dim_i, color in enumerate(colors):
                v_i = idx[dim_i]
                rgb = dim_handlers[dim_i].dim_color(v_i)
                r = mpatches.Rectangle((start + dim_i * width, cell_i-.5), 
                                       width, 1.2, 
                                       facecolor=rgb, linewidth=0)
                r.set_clip_on(False)
                ax.add_patch(r)

                r = mpatches.Rectangle((cell_i-.5, start + dim_i * width), 
                                       1.2, width,
                                       facecolor=rgb, linewidth=0)
                r.set_clip_on(False)
                ax.add_patch(r)

        if labels:
            patches = [ mpatches.Patch(label=dim_labels[i]) for i in range(len(dims)) ]
            ax.legend(handles=patches, 
                      handler_map=dict(zip(patches,dim_handlers)),
                      loc='upper left',
                      bbox_to_anchor=(0,0),
                      ncol=2,
                      fontsize=9,
                      frameon=False)

    if labels:
        plt.subplots_adjust(left=0.07,
                            right=.88,
                            wspace=0.0, hspace=0.0)
Exemple #36
0
def plot_budget(Gvel, buddif, buddiv3, budadv3, Utemp, Vtemp, spath, ts):
    """
    Diagnostic figures for the budget - projected properly
    """
    f = plt.figure(figsize=[9, 9])
    # dtarg = dt.datetime(2015,1,1)
    # t_p1 = B1.condy.get_index(dtarg)
    # t_p2 = B2.condy.get_index(dtarg)
    # t_p3 = B3.condy.get_index(dtarg)
    vlim = 0.4
    a_no = 30
    #     a_sc = 3.9e-1
    # a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp))
    #     print(a_sc)
    a_sc = 2.5 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0]
    m = Gvel.mplot
    p_rng = np.nanpercentile(buddif, [2, 98])
    pr = np.max(np.abs(p_rng))
    p_rng = [-pr, pr]

    ### intensification
    plt.subplot(2, 2, 1)
    m.pcolormesh(Gvel.xptp, Gvel.yptp, buddif, cmap='RdBu', rasterized=True)
    m.colorbar(location='bottom')
    plt.clim(p_rng)
    m.drawcoastlines()
    plt.title('Intensification ' + ts.strftime('%Y%m%d'))

    ### DIVERGENCE
    plt.subplot(2, 2, 2)
    rm = int(Gvel.m / a_no)
    rn = int(Gvel.n / a_no)
    ra = np.sqrt(rm + rn)
    ra = ra * a_sc
    m.pcolormesh(Gvel.xptp, Gvel.yptp, buddiv3, cmap='RdBu', rasterized=True)
    plt.clim(p_rng)
    m.colorbar(location='bottom')
    ur, vr = Gvel.rotate_vectors_to_plot(Utemp, Vtemp)
    m.quiver(Gvel.xpts[::rm, ::rn],
             Gvel.ypts[::rm, ::rn],
             ur[::rm, ::rn],
             vr[::rm, ::rn],
             scale=ra,
             width=0.005)
    m.drawcoastlines()
    plt.title('Divergence ' + ts.strftime('%Y%m%d'))

    ### ADVECTION
    plt.subplot(2, 2, 3)
    rm = int(Gvel.m / a_no)
    rn = int(Gvel.n / a_no)
    ra = np.sqrt(rm + rn)
    ra = ra * a_sc
    m.pcolormesh(Gvel.xptp, Gvel.yptp, budadv3, cmap='RdBu', rasterized=True)
    plt.clim(p_rng)
    m.colorbar(location='bottom')
    m.quiver(Gvel.xpts[::rm, ::rn],
             Gvel.ypts[::rm, ::rn],
             ur[::rm, ::rn],
             vr[::rm, ::rn],
             scale=ra,
             width=0.005)
    m.drawcoastlines()
    plt.title('Advection ' + ts.strftime('%Y%m%d'))

    ### intensification
    plt.subplot(2, 2, 4)
    m.pcolormesh(Gvel.xptp,
                 Gvel.yptp,
                 buddif - buddiv3 - budadv3,
                 cmap='RdBu',
                 rasterized=True)
    plt.clim(p_rng)
    m.colorbar(location='bottom')
    m.drawcoastlines()
    plt.title('Residual ' + ts.strftime('%Y%m%d'))

    f.savefig(spath + 'Budget_components_' + ts.strftime('%Y%m%d') + '.pdf',
              bbox_inches='tight')
    print('Saving figure: ' + spath + 'Budget_components_' +
          ts.strftime('%Y%m%d') + '.pdf')
Exemple #37
0
def plot_budget_square(Gvel, buddif, buddiv3, budadv3, Utemp, Vtemp, spath,
                       ts):
    """
    Diagnostic figures for the budget - square grid
    """
    f = plt.figure(figsize=[9, 9])
    # dtarg = dt.datetime(2015,1,1)
    # t_p1 = B1.condy.get_index(dtarg)
    # t_p2 = B2.condy.get_index(dtarg)
    # t_p3 = B3.condy.get_index(dtarg)
    vlim = 0.4
    a_no = 20
    #     a_sc = 3.9e-1
    #     a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp))
    a_sc = 2.5 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0]
    #     print(a_sc)
    p_rng = np.nanpercentile(buddif, [2, 98])
    pr = np.max(np.abs(p_rng))
    p_rng = [-pr, pr]

    Gvel.get_square_points()

    ### intensification
    plt.subplot(2, 2, 1)
    plt.pcolormesh(Gvel.xsq, Gvel.ysq, buddif, cmap='RdBu', rasterized=True)
    plt.colorbar(orientation="horizontal")
    plt.clim(p_rng)
    plt.title('Intensification ' + ts.strftime('%Y%m%d'))

    ### DIVERGENCE
    plt.subplot(2, 2, 2)
    rm = int(Gvel.m / a_no)
    rn = int(Gvel.n / a_no)
    ra = np.sqrt(rm + rn)
    ra = ra * a_sc
    plt.pcolormesh(Gvel.xsq, Gvel.ysq, buddiv3, cmap='RdBu', rasterized=True)
    plt.clim(p_rng)
    plt.colorbar(orientation="horizontal")
    plt.quiver(Gvel.xsq[::rm, ::rn],
               Gvel.ysq[::rm, ::rn],
               Utemp[::rm, ::rn],
               Vtemp[::rm, ::rn],
               scale=ra,
               width=0.005)
    plt.title('Divergence ' + ts.strftime('%Y%m%d'))

    ### ADVECTION
    plt.subplot(2, 2, 3)
    plt.pcolormesh(Gvel.xsq, Gvel.ysq, budadv3, cmap='RdBu', rasterized=True)
    plt.clim(p_rng)
    plt.colorbar(orientation="horizontal")
    plt.quiver(Gvel.xsq[::rm, ::rn],
               Gvel.ysq[::rm, ::rn],
               Utemp[::rm, ::rn],
               Vtemp[::rm, ::rn],
               scale=ra,
               width=0.005)
    plt.title('Advection ' + ts.strftime('%Y%m%d'))

    ### intensification
    plt.subplot(2, 2, 4)
    plt.pcolormesh(Gvel.xsq,
                   Gvel.ysq,
                   buddif - buddiv3 - budadv3,
                   cmap='RdBu',
                   rasterized=True)
    plt.clim(p_rng)
    plt.colorbar(orientation="horizontal")
    plt.title('Residual ' + ts.strftime('%Y%m%d'))

    f.savefig(spath + 'Budget_components_square_' + ts.strftime('%Y%m%d') +
              '.pdf',
              bbox_inches='tight')
    print('Saving figure: ' + spath + 'Budget_components_square_' +
          ts.strftime('%Y%m%d') + '.pdf')
def make_aperture_image(label, filter_list,
                            center_ra, center_dec, major_diam, minor_diam, pos_angle):
    """
    Make a picture of the galaxy with the apertures overlaid

    Currently just does one given aperture, but should eventually do the various
    annuli for each filter

    Parameters
    ----------
    label : string
        label associated with the galaxy, both for finding image/data files and
        saving the aperture image (e.g., 'ngc24_offset_')

    filter_list : list of strings
        filters for the galaxy

    center_ra, center_dec : float
        coordinates of the center of the galaxy (degrees)

    major_diam, minor_diam : float
        major and minor axes for the galaxy ellipse (arcsec)

    pos_angle : float
        position angle of the galaxy ellipse ("position angle increases
        counterclockwise from North (PA=0)")

    """
    counts_im = label + 'sk.fits'
    exp_im = label + 'ex.fits'

    # get the image HDUs
    hdu_list = []
    for filt in filter_list:
        with fits.open(label+filt+'_sk.fits') as hdu_counts, fits.open(label+filt+'_ex.fits') as hdu_ex:
            hdu_list.append(fits.ImageHDU(data=hdu_counts[1].data/hdu_ex[1].data,
                                                header=hdu_counts[1].header))

    # if there's more than one filter, do reprojection
    if len(filter_list) > 1:
        for f in range(1,len(filter_list)):
            new_array, _ = reproject_interp(hdu_list[f], hdu_list[0].header)
            hdu_list[f] = fits.ImageHDU(data=new_array, header=hdu_list[0].header)

    # normalize the images
    for f in range(len(filter_list)):

        # subtract mode
        # - do a sigma clip
        pix_clip = sigma_clip(hdu_list[f].data, sigma=2.5, iters=3)
        # - calculate biweight
        biweight_clip = biweight_location(pix_clip.data[~pix_clip.mask])
        # - subtraction
        new_array = hdu_list[f].data - biweight_clip

        # set anything below 0 to 0
        new_array[new_array < 0] = 0

        # set 95th percentile to 1
        new_array = new_array/np.nanpercentile(new_array, 95)

        # save it
        hdu_list[f].data = new_array


    # add the images together
    im_sum = np.mean([hdu_list[f].data for f in range(len(filter_list))], axis=0)

    # make it into an HDU
    hdu_sum = fits.ImageHDU(data=log_image(im_sum, 0, np.nanpercentile(im_sum, 99.5)),
                                header=hdu_list[0].header)

    # make an image
    fig = aplpy.FITSFigure(hdu_sum)
    fig.show_grayscale()
    fig.axis_labels.hide_x()
    fig.axis_labels.hide_y()
    fig.tick_labels.hide_x()
    fig.tick_labels.hide_y()
    fig.frame.set_linewidth(0)

    # aperture ellipses
    fig.show_ellipses(center_ra, center_dec,
                          major_diam/3600, minor_diam/3600,
                          angle=90+pos_angle,
                          edgecolor='red', linewidth=2)

    fig.save(label+'aperture_image.pdf')
Exemple #39
0
    def get_features_object(self):
        '''
        This method will calculate features that characterize the rapideye scene, 
        and will be useful later. It will populate the values into the database.
        '''
        array = self.get_raster().read_data_file_as_array()
        data_array = numpy.array(array)
        features_array = []
        for i in range(data_array.shape[0]):
            band = data_array[i, :, :].ravel()
            band = band[band != 0]
            features_array.append(numpy.nanpercentile(band, 10))
            features_array.append(numpy.nanpercentile(band, 25))
            features_array.append(numpy.nanpercentile(band, 50))
            features_array.append(numpy.nanpercentile(band, 75))
            features_array.append(numpy.nanpercentile(band, 90))
            features_array.append(numpy.mean(band))
            features_array.append(numpy.min(band) * 1.0)
            features_array.append(numpy.max(band) * 1.0)
        geotransform = self.get_raster().get_geotransform()

        features_array.append(geotransform[0])
        features_array.append(geotransform[3])
        features_array.append((self.get_aquisition_date() -
                               datetime.datetime(1970, 1, 1)).total_seconds())
        tile_id = self.get_sensor().get_attribute(TILE_ID)
        raster_path = self.file_dictionary[_IMAGE]
        features = RapideyeFeatures(band_1_quant_10=features_array[0],
                                    band_1_quant_25=features_array[1],
                                    band_1_quant_50=features_array[2],
                                    band_1_quant_75=features_array[3],
                                    band_1_quant_90=features_array[4],
                                    band_1_mean=features_array[5],
                                    band_1_min=features_array[6],
                                    band_1_max=features_array[7],
                                    band_2_quant_10=features_array[8],
                                    band_2_quant_25=features_array[9],
                                    band_2_quant_50=features_array[10],
                                    band_2_quant_75=features_array[11],
                                    band_2_quant_90=features_array[12],
                                    band_2_mean=features_array[13],
                                    band_2_min=features_array[14],
                                    band_2_max=features_array[15],
                                    band_3_quant_10=features_array[16],
                                    band_3_quant_25=features_array[17],
                                    band_3_quant_50=features_array[18],
                                    band_3_quant_75=features_array[19],
                                    band_3_quant_90=features_array[20],
                                    band_3_mean=features_array[21],
                                    band_3_min=features_array[22],
                                    band_3_max=features_array[23],
                                    band_4_quant_10=features_array[24],
                                    band_4_quant_25=features_array[25],
                                    band_4_quant_50=features_array[26],
                                    band_4_quant_75=features_array[27],
                                    band_4_quant_90=features_array[28],
                                    band_4_mean=features_array[29],
                                    band_4_min=features_array[30],
                                    band_4_max=features_array[31],
                                    band_5_quant_10=features_array[32],
                                    band_5_quant_25=features_array[33],
                                    band_5_quant_50=features_array[34],
                                    band_5_quant_75=features_array[35],
                                    band_5_quant_90=features_array[36],
                                    band_5_mean=features_array[37],
                                    band_5_min=features_array[38],
                                    band_5_max=features_array[39],
                                    top=features_array[40],
                                    left=features_array[41],
                                    time=features_array[42],
                                    footprint=tile_id,
                                    path=raster_path)
        return features
        par = (par - par_min) / (par_max - par_min)

        red, green, blue = cm.jet(par)[:, :3].T
        alpha = r2 > r2_thr

        red[~alpha] = np.nan
        green[~alpha] = np.nan
        blue[~alpha] = np.nan

        return cortex.VertexRGB(
            red=red, green=green, blue=blue, subject='fsaverage', alpha=alpha.astype(float) * 0.7)

    for sid in range(10):
        p = pars[sid, :, 0]

        par_min = np.nanpercentile(p, 10)
        par_max = np.nanpercentile(p, 90)
        extra_subjects[f'mu_s{sid}'] = get_thr_map(pars[sid, :, 0], r2s[sid],
                par_min=par_min, par_max=par_max)

    ds = cortex.Dataset(
        r2=r2s_mean_v,
        corr=corrs_mean_v,
        # r2_cv=r2s_cv_v,
        # r2_trialwise=r2s_trialwise_v,
        # corrs_cv_mean=corrs_cv_mean_v,
        mu_log=pars_mean_v_log,
        mu_log_thr=weighted_mu_rgb_v,
        weighted_sd=weighted_sd,
        weighted_amplitude=weighted_amplitude,
        **extra_subjects)
Exemple #41
0
def plot_mean_quantile_returns_spread_time_series(mean_returns_spread,
                                                  std_err=None,
                                                  bandwidth=1,
                                                  ax=None):
    """
    Plots mean period wise returns for factor quantiles.

    Parameters
    ----------
    mean_returns_spread : pd.Series
        Series with difference between quantile mean returns by period.
    std_err : pd.Series
        Series with standard error of difference between quantile
        mean returns each period.
    bandwidth : float
        Width of displayed error bands in standard deviations.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    if isinstance(mean_returns_spread, pd.DataFrame):
        if ax is None:
            ax = [None for a in mean_returns_spread.columns]

        ymin, ymax = (None, None)
        for (i, a), (name, fr_column) in zip(enumerate(ax),
                                             mean_returns_spread.iteritems()):
            stdn = None if std_err is None else std_err[name]
            a = plot_mean_quantile_returns_spread_time_series(fr_column,
                                                              std_err=stdn,
                                                              ax=a)
            ax[i] = a
            curr_ymin, curr_ymax = a.get_ylim()
            ymin = curr_ymin if ymin is None else min(ymin, curr_ymin)
            ymax = curr_ymax if ymax is None else max(ymax, curr_ymax)

        for a in ax:
            a.set_ylim([ymin, ymax])

        return ax

    if mean_returns_spread.isnull().all():
        return ax

    periods = mean_returns_spread.name
    title = ('Top Minus Bottom Quantile Mean Return ({} Period Forward Return)'
             .format(periods if periods is not None else ""))

    if ax is None:
        f, ax = plt.subplots(figsize=(18, 6))

    mean_returns_spread_bps = mean_returns_spread * DECIMAL_TO_BPS

    mean_returns_spread_bps.plot(alpha=0.4, ax=ax, lw=0.7, color='forestgreen')
    mean_returns_spread_bps.rolling(window=22).mean().plot(color='orangered',
                                                           alpha=0.7,
                                                           ax=ax)
    ax.legend(['mean returns spread', '1 month moving avg'], loc='upper right')

    if std_err is not None:
        std_err_bps = std_err * DECIMAL_TO_BPS
        upper = mean_returns_spread_bps.values + (std_err_bps * bandwidth)
        lower = mean_returns_spread_bps.values - (std_err_bps * bandwidth)
        ax.fill_between(mean_returns_spread.index,
                        lower,
                        upper,
                        alpha=0.3,
                        color='steelblue')

    ylim = np.nanpercentile(abs(mean_returns_spread_bps.values), 95)
    ax.set(ylabel='Difference In Quantile Mean Return (bps)',
           xlabel='',
           title=title,
           ylim=(-ylim, ylim))
    ax.axhline(0.0, linestyle='-', color='black', lw=1, alpha=0.8)

    return ax
Exemple #42
0
def plot_quantile_returns_bar(mean_ret_by_q,
                              by_group=False,
                              ylim_percentiles=None,
                              ax=None):
    """
    Plots mean period wise returns for factor quantiles.

    Parameters
    ----------
    mean_ret_by_q : pd.DataFrame
        DataFrame with quantile, (group) and mean period wise return values.
    by_group : bool
        Disaggregated figures by group.
    ylim_percentiles : tuple of integers
        Percentiles of observed data to use as y limits for plot.
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
        The axes that were plotted on.
    """

    mean_ret_by_q = mean_ret_by_q.copy()

    if ylim_percentiles is not None:
        ymin = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[0]) *
                DECIMAL_TO_BPS)
        ymax = (np.nanpercentile(mean_ret_by_q.values, ylim_percentiles[1]) *
                DECIMAL_TO_BPS)
    else:
        ymin = None
        ymax = None

    if by_group:
        num_group = len(mean_ret_by_q.index.get_level_values('group').unique())

        if ax is None:
            v_spaces = ((num_group - 1) // 2) + 1
            f, ax = plt.subplots(v_spaces,
                                 2,
                                 sharex=False,
                                 sharey=True,
                                 figsize=(18, 6 * v_spaces))
            ax = ax.flatten()

        for a, (sc, cor) in zip(ax, mean_ret_by_q.groupby(level='group')):
            (cor.xs(sc,
                    level='group').multiply(DECIMAL_TO_BPS).plot(kind='bar',
                                                                 title=sc,
                                                                 ax=a))

            a.set(xlabel='', ylabel='Mean Return (bps)', ylim=(ymin, ymax))

        if num_group < len(ax):
            ax[-1].set_visible(False)

        return ax

    else:
        if ax is None:
            f, ax = plt.subplots(1, 1, figsize=(18, 6))

        (mean_ret_by_q.multiply(DECIMAL_TO_BPS).plot(
            kind='bar',
            title="Mean Period Wise Return By Factor Quantile",
            ax=ax))
        ax.set(xlabel='', ylabel='Mean Return (bps)', ylim=(ymin, ymax))

        return ax
Exemple #43
0
def do_stats(time,statf,data,drr,hem,filename,sheetname,min_occ):

        year=time.year
        month=time.month

        mat=[]
        row=['']
        for stat in statf:
            if isinstance(stat,str):
                row.append(stat)
            elif isinstance(stat,list):
                for p in stat:
                    row.append('P'+str(p))
            else:
                row.append('Main direction')

        mat.append(row)



        # monthly stats
        for mo in range(1,13):   
            idx=month==mo
            if any(idx):
                row=[datetime.date(1900, mo, 1).strftime('%B')]
                for stat in statf:
                    if stat=='n':
                        tmp=data[idx]
                        row.append('%.2f'%len(tmp[~np.isnan(tmp)]))
                    elif isinstance(stat,str):
                        fct=getattr(np, 'nan'+stat)
                        row.append('%.2f'%fct(data[idx]))
                    elif isinstance(stat,list):
                        perc=list(np.nanpercentile(data[idx],stat))
                        row+=['%.2f'%x for x in perc]
                    else:
                        if not isinstance(drr,str):
                            #for min_occ in [15,10,5,1]:
                            occ=do_occurence(drr[idx].values,min_occ)
                            #    if len(occ)>0:
                            #        break
                            row.append(', '.join(occ))

                mat.append(row)



        # Do seasons
        if hem=='South hemisphere(Summer/Winter)':
            seas=[((month<=3) | (month>=10))] # Summer: October to March
            seas.append(((month>=4) & (month<=9))) # Winter: April to September
            sea_names=['Summer','Winter']

        elif hem=='South hemisphere 4 seasons':
            seas=[(month>=6) & (month <=8)]# winter
            seas.append((month>=9) & (month <=11))# spring
            seas.append((month>=12) | (month<=2))#summer
            seas.append((month>=3) & (month<=5))# autumn
            sea_names=['Winter','Spring','Summer','Autumn']
        elif hem =='North hemishere(Summer/Winter)':
            seas=[(month>=4) & (month<=9)]  # Winter: April to September
            seas.append((month<=3) | (month>=10)) # Summer: October to March
            sea_names=['Summer','Winter']
        elif hem=='North hemisphere moosoon(SW,NE,Hot season)':
            seas=[(month>=5) & (month<=10)] # SW: May to Oct
            seas.append((month<=2) | (month>=11)) # SE: Nov to Feb
            seas.append((month==3) | (month==4)) # Hot: March and April
            sea_names=['SW monsoon','NE monsoon','Hot season']
        elif hem=='North hemisphere 4 seasons':
            seas=[(month>=12) | (month<=2)] # winter
            seas.append((month>=3) & (month<=5)) # spring
            seas.append((month>=6) & (month <=8)) # summer
            seas.append((month>=9) & (month <=11)) # autumn
            sea_names=['Winter','Spring','Summer','Autumn']
        elif hem == 'Yearly':
            unique_year=np.unique(year)
            seas=[]
            sea_names=[]
            for y in unique_year:
                seas.append(year==y)
                sea_names.append('%i' % y)


        for i,idx in enumerate(seas):
            if any(idx):
                row=[sea_names[i]]
                for stat in statf:
                    if stat=='n':
                        tmp=data[idx]
                        row.append('%.2f'%len(tmp[~np.isnan(tmp)]))
                    elif isinstance(stat,str):
                        fct=getattr(np, 'nan'+stat)
                        row.append('%.2f'%fct(data[idx]))
                    elif isinstance(stat,list):
                        perc=list(np.nanpercentile(data[idx],stat))
                        row+=['%.2f'%x for x in perc]
                    else:
                        if not isinstance(drr,str):
                            #for min_occ in [15,10,5,1]:
                            occ=do_occurence(drr[idx].values,min_occ)
                            #    if len(occ)>0:
                            #        break
                            row.append(', '.join(occ))

                mat.append(row)

        # %% Do total
        row=['Total']
        for stat in statf:
            if stat=='n':
                row.append('%.2f'%len(data[~np.isnan(data)]))
            elif isinstance(stat,str):
                fct=getattr(np, 'nan'+stat)
                row.append('%.2f'%fct(data))
            elif isinstance(stat,list):
                perc=list(np.nanpercentile(data,stat))
                row+=['%.2f'%x for x in perc]
            else:
                if not isinstance(drr,str):
                    #for min_occ in [15,10,5,1]:
                    occ=do_occurence(drr.values,min_occ)
                    #    if len(occ)>0:
                    #        break
                    row.append(', '.join(occ))

        mat.append(row)
        create_table(filename,sheetname,np.array(mat))
Exemple #44
0
def calc_wd_age(teff,
                e_teff,
                logg,
                e_logg,
                n_mc=2000,
                model_wd='DA',
                feh='p0.00',
                vvcrit='0.0',
                model_ifmr='Cummings_2018',
                return_distributions=False):
    '''
    Calculated white dwarfs ages with a frequentist approch. Starts from normal 
    dristribution of teff and logg based on the errors and passes the full
    distribution through the same process to get a distribution of ages.
    '''

    if (not isinstance(teff, np.ndarray)):
        teff = np.array([teff])
        e_teff = np.array([e_teff])
        logg = np.array([logg])
        e_logg = np.array([e_logg])

    N = len(teff)

    teff_dist, logg_dist = [], []

    for i in range(N):
        if (np.isnan(teff[i] + e_teff[i] + logg[i] + e_logg[i])):
            teff_dist.append(np.nan)
            logg_dist.append(np.nan)
        else:
            teff_dist.append(np.random.normal(teff[i], e_teff[i], n_mc))
            logg_dist.append(np.random.normal(logg[i], e_logg[i], n_mc))
    teff_dist, logg_dist = np.array(teff_dist), np.array(logg_dist)

    cooling_age_dist, final_mass_dist = calc_cooling_age(teff_dist,
                                                         logg_dist,
                                                         n_mc,
                                                         N,
                                                         model=model_wd)

    initial_mass_dist = calc_initial_mass(model_ifmr, final_mass_dist, n_mc)

    ms_age_dist = calc_ms_age(initial_mass_dist, feh=feh, vvcrit=vvcrit)

    total_age_dist = cooling_age_dist + ms_age_dist

    mask = np.logical_or(
        np.logical_or(ms_age_dist / 1e9 > 13.8, total_age_dist / 1e9 > 13.8),
        cooling_age_dist / 1e9 > 13.8)

    cooling_age_dist[mask] = np.copy(cooling_age_dist[mask]) * np.nan
    final_mass_dist[mask] = np.copy(final_mass_dist[mask]) * np.nan
    initial_mass_dist[mask] = np.copy(initial_mass_dist[mask]) * np.nan
    ms_age_dist[mask] = np.copy(ms_age_dist[mask]) * np.nan
    total_age_dist[mask] = np.copy(total_age_dist[mask]) * np.nan

    results = Table()

    results['final_mass_median'] = np.array(
        [np.nanpercentile(x, 50) for x in final_mass_dist])
    results['final_mass_err_high'] = np.array([
        np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50)
        for x in final_mass_dist
    ])
    results['final_mass_err_low'] = np.array([
        np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655)
        for x in final_mass_dist
    ])

    results['initial_mass_median'] = np.array(
        [np.nanpercentile(x, 50) for x in initial_mass_dist])
    results['initial_mass_err_high'] = np.array([
        np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50)
        for x in initial_mass_dist
    ])
    results['initial_mass_err_low'] = np.array([
        np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655)
        for x in initial_mass_dist
    ])

    results['cooling_age_median'] = np.array(
        [np.nanpercentile(x, 50) for x in cooling_age_dist])
    results['cooling_age_err_high'] = np.array([
        np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50)
        for x in cooling_age_dist
    ])
    results['cooling_age_err_low'] = np.array([
        np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655)
        for x in cooling_age_dist
    ])

    results['ms_age_median'] = np.array(
        [np.nanpercentile(x, 50) for x in ms_age_dist])
    results['ms_age_err_high'] = np.array([
        np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50)
        for x in ms_age_dist
    ])
    results['ms_age_err_low'] = np.array([
        np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655)
        for x in ms_age_dist
    ])

    results['total_age_median'] = np.array(
        [np.nanpercentile(x, 50) for x in total_age_dist])
    results['total_age_err_high'] = np.array([
        np.nanpercentile(x, 84.1345) - np.nanpercentile(x, 50)
        for x in total_age_dist
    ])
    results['total_age_err_low'] = np.array([
        np.nanpercentile(x, 50) - np.nanpercentile(x, 15.8655)
        for x in total_age_dist
    ])

    if (return_distributions):
        results['final_mass_dist'] = final_mass_dist
        results['initial_mass_dist'] = initial_mass_dist
        results['cooling_age_dist'] = cooling_age_dist
        results['ms_age_dist'] = ms_age_dist
        results['total_age_dist'] = total_age_dist
    return results
Exemple #45
0
def threemultis():
    # K2-198
    # ---------------------------------------------#
    print('K2-198')
    if not os.path.isfile('{}/results/K2-198.fits'.format(PACKAGEDIR)):
        tpfs = lk.search_targetpixelfile('K2-198').download_all()

        clcs = []  # Corrected Light Curves

        for idx, tpf in enumerate(tpfs):
            tpf = tpf[10:]
            tpf = tpf[np.in1d(
                tpf.time,
                tpf.to_lightcurve(aperture_mask='all').remove_nans().time)]
            tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8]
            aper = tpf.create_threshold_mask()
            tpf.plot(aperture_mask=aper)

            mask = utils.planet_mask(tpf.time, 'K2-198')
            clc = fit.PLD(tpf,
                          planet_mask=mask,
                          trim=1,
                          ndraws=1000,
                          logrho_mu=np.log10(150),
                          aperture=aper)
            pickle.dump(
                clc,
                open('{}/results/K2-198_{}.p'.format(PACKAGEDIR, idx), 'wb'))
            clcs.append(clc)

        clc = clcs[0].append(clcs[1])
        clc.to_fits('{}/results/K2-198.fits'.format(PACKAGEDIR),
                    overwrite=True)
        clc.to_csv('{}/results/K2-198.csv'.format(PACKAGEDIR))
    else:
        print('file exists')
        df = pd.read_csv('{}/results/K2-198.csv'.format(PACKAGEDIR))
        clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err)
    #_run(clc, 'K2-198')

    # K2-168
    # ---------------------------------------------#
    print('K2-168')

    if not os.path.isfile('{}/results/K2-168.fits'.format(PACKAGEDIR)):
        tpf = lk.search_targetpixelfile('K2-168').download()
        tpf = tpf[10:]
        tpf = tpf[np.in1d(
            tpf.time,
            tpf.to_lightcurve(aperture_mask='all').remove_nans().time)]
        tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8]

        mask = utils.planet_mask(tpf.time, 'K2-168')
        aper = np.nanmedian(tpf.flux, axis=0) > 30
        # First pass, remove some very bad outliers
        bad = np.zeros(len(tpf.time), bool)
        for count in range(2):
            pld_lc = tpf[~bad].to_corrector('pld').correct(
                aperture_mask=aper, cadence_mask=mask[~bad])
            pld_lc = pld_lc.flatten(31, mask=~mask[~bad])
            bad |= np.in1d(
                tpf.time, pld_lc.time[np.abs(pld_lc.flux - 1) > 5 *
                                      np.std(pld_lc.flux - 1)])

        tpf = tpf[~bad]
        mask = mask[~bad]
        clc = fit.PLD(tpf,
                      planet_mask=mask,
                      trim=0,
                      aperture=aper,
                      logrho_mu=np.log(1))
        clc.to_fits('{}/results/K2-168.fits'.format(PACKAGEDIR))
        clc.to_csv('{}/results/K2-168.csv'.format(PACKAGEDIR))
        pickle.dump(clc, open('{}/results/K2-168.p'.format(PACKAGEDIR), 'wb'))
    else:
        print('file exists')
        df = pd.read_csv('{}/results/K2-168.csv'.format(PACKAGEDIR))
        clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err)

    #_run(clc, 'K2-168')

    # K2-43
    # ---------------------------------------------#
    print('K2-43')

    if not os.path.isfile('{}/results/K2-43.fits'.format(PACKAGEDIR)):
        # Trim out some pixels which have a bleed column on them
        raw_tpf = lk.search_targetpixelfile('K2-43').download()
        hdu = deepcopy(raw_tpf.hdu)
        for name in hdu[1].columns.names:
            if (len(hdu[1].data[name].shape) == 3):
                hdu[1].data[name][:, :, :4] = np.nan
        fits.HDUList(hdus=list(hdu)).writeto('hack.fits', overwrite=True)
        tpf = lk.KeplerTargetPixelFile('hack.fits',
                                       quality_bitmask=raw_tpf.quality_bitmask)
        os.remove('hack.fits')

        tpf = tpf[10:]
        tpf = tpf[np.in1d(
            tpf.time,
            tpf.to_lightcurve(aperture_mask='all').remove_nans().time)]
        tpf = tpf[tpf.to_lightcurve().normalize().flux > 0.8]

        mask = utils.planet_mask(tpf.time, 'K2-43')
        aper = np.nan_to_num(np.nanpercentile(tpf.flux, 95, axis=(0))) > 50

        # First pass, remove some very bad outliers
        bad = np.zeros(len(tpf.time), bool)
        for count in range(2):
            pld_lc = tpf[~bad].to_corrector('pld').correct(
                aperture_mask=aper, cadence_mask=mask[~bad])
            pld_lc = pld_lc.flatten(31, mask=~mask[~bad])
            bad |= np.in1d(
                tpf.time, pld_lc.time[np.abs(pld_lc.flux - 1) > 5 *
                                      np.std(pld_lc.flux - 1)])

        tpf = tpf[~bad]
        mask = mask[~bad]
        clc = fit.PLD(tpf,
                      planet_mask=mask,
                      trim=1,
                      aperture=aper,
                      logrho_mu=np.log(30))
        clc.to_fits('{}/results/K2-43.fits'.format(PACKAGEDIR))
        clc.to_csv('{}/results/K2-43.csv'.format(PACKAGEDIR))
        pickle.dump(clc, open('{}/results/K2-43.p'.format(PACKAGEDIR), 'wb'))
    else:
        print('file exists')
        df = pd.read_csv('{}/results/K2-43.csv'.format(PACKAGEDIR))
        clc = lk.KeplerLightCurve(df.time, df.flux, df.flux_err)
Exemple #46
0
print("-------- Analyse par Cateorie-----------")
print(kinds)
#je veux la taille moyenne, mediane, perecentile 70, percentile 30
calcByCategory = {}
for category in categories:
    # print(dataByCategory[category])
    calc = {}
    data = np.array(dataByCategory[category], dtype=np.float)
    meanvalues = np.nanmean(data, axis=0)
    medianvalues = np.nanmedian(data, axis=0)
    sdvalues = np.nanstd(data, axis=0)
    varvalues = np.nanvar(data, axis=0)
    minvalues = np.nanmin(data, axis=0)
    maxvalues = np.nanmax(data, axis=0)
    percentile25values = np.nanpercentile(data, 25, axis=0)
    percentile75values = np.nanpercentile(data, 75, axis=0)
    lengthvalues = np.count_nonzero(~np.isnan(data), axis=0)
    for i in range(len(kinds)):
        calc["mean-" + kinds[i]] = meanvalues[i]
        calc["median-" + kinds[i]] = medianvalues[i]
        calc["sd-" + kinds[i]] = sdvalues[i]
        calc["var-" + kinds[i]] = varvalues[i]
        calc["min-" + kinds[i]] = minvalues[i]
        calc["max-" + kinds[i]] = maxvalues[i]
        calc["percentile25-" + kinds[i]] = percentile25values[i]
        calc["percentile75-" + kinds[i]] = percentile75values[i]
        calc["length-" + kinds[i]] = lengthvalues[i]
    calcByCategory[category] = calc

# print(calcByCategory)
def nanpercentile(arr, axis=0):
    return np.nanpercentile(arr, PERCENTILES, axis=axis)
Exemple #48
0
def cpg_heatmap(df,
                methylated_color: str = 'rgb(215,48,39)',
                unmethylated_color: str = 'rgb(33,102,172)',
                ambiguous_color: str = 'rgb(240,240,240)',
                lim_llr: float = 10,
                min_diff_llr: float = 1,
                fig_width: int = None,
                fig_height: int = None):
    """
    Plot the values per CpG as a heatmap
    """
    # Cannot calculate if at least not 2 values
    if len(df.columns) <= 1:
        return None

    # Fill missing values by 0 = ambiguous methylation
    df = df.fillna(0)

    # Prepare subplot aread
    fig = make_subplots(rows=1,
                        cols=2,
                        shared_yaxes=True,
                        column_widths=[0.95, 0.05],
                        specs=[[{
                            "type": "heatmap"
                        }, {
                            "type": "scatter"
                        }]])

    # Plot dendogramm
    dendrogram = ff.create_dendrogram(df.values,
                                      labels=df.index,
                                      orientation='left',
                                      color_threshold=0,
                                      colorscale=["grey"])
    for data in dendrogram.data:
        fig.add_trace(data, row=1, col=2)

    # Reorder rows
    labels_ordered = np.flip(dendrogram.layout['yaxis']['ticktext'])
    df = df.reindex(labels_ordered)

    # Define min_llr if not given = symetrical 2nd percentile
    if not lim_llr:
        lim_llr = max(np.absolute(np.nanpercentile(df.values, [2, 98])))

    # Define colorscale
    offset = min_diff_llr / lim_llr * 0.5
    colorscale = colorscale = [[0.0, unmethylated_color],
                               [0.5 - offset, ambiguous_color],
                               [0.5 + offset, ambiguous_color],
                               [1.0, methylated_color]]

    # plot heatmap
    heatmap = go.Heatmap(name="heatmap",
                         x=df.columns,
                         y=df.index,
                         z=df.values,
                         zmin=-lim_llr,
                         zmax=lim_llr,
                         zmid=0,
                         colorscale=colorscale,
                         colorbar_title="Median LLR")
    fig.add_trace(heatmap, row=1, col=1)

    # Tweak figure layout
    fig.update_layout(dict1={
        'showlegend': False,
        'hovermode': 'closest',
        "plot_bgcolor": 'rgba(0,0,0,0)',
        "width": fig_width,
        "height": fig_height,
        "margin": {
            "t": 50,
            "b": 50
        }
    },
                      xaxis2={
                          "fixedrange": True,
                          'showgrid': False,
                          'showline': False,
                          "showticklabels": False,
                          'zeroline': False,
                          'ticks': ""
                      },
                      yaxis2={
                          "fixedrange": True,
                          'showgrid': False,
                          'showline': False,
                          "showticklabels": False,
                          'zeroline': False,
                          'ticks': "",
                          "automargin": True
                      },
                      xaxis={
                          "fixedrange": False,
                          "domain": [0, 0.95],
                          "showticklabels": False,
                          "title": "CpG positions"
                      },
                      yaxis={
                          "fixedrange": True,
                          "domain": [0, 1],
                          "ticks": "outside",
                          "automargin": True
                      })

    return fig
Exemple #49
0
def plot_dstats(Gvel, Utemp, Vtemp, InputV, ddiv, dcrl, dshr, spath, ts):
    """
    Diagnostic figures for the drift statistics - projected properly
    """
    f = plt.figure(figsize=[9, 3])
    # dtarg = dt.datetime(2015,1,1)
    # t_p1 = B1.condy.get_index(dtarg)
    # t_p2 = B2.condy.get_index(dtarg)
    # t_p3 = B3.condy.get_index(dtarg)
    vlim = 0.4
    a_no = 30
    #     a_sc = 3.9e-1
    #     a_sc = 2*np.nanmax(np.hypot(Utemp,Vtemp))
    a_sc = 2 * np.nanpercentile(np.hypot(Utemp, Vtemp), [90])[0]
    rm = int(Gvel.m / a_no)
    rn = int(Gvel.n / a_no)
    ra = np.sqrt(rm + rn)
    ra = ra * a_sc
    m = Gvel.mplot

    plt.subplot(1, 3, 1)
    m.pcolormesh(Gvel.xptp, Gvel.yptp, ddiv, cmap='RdBu', rasterized=True)
    m.colorbar(location='bottom')
    p_rng = np.nanpercentile(ddiv, [40, 60])
    pr = np.max(np.abs(p_rng))
    p_rng = [-pr, pr]
    #     plt.clim(p_rng)
    #     plt.clim([0.0,1.0])
    m.drawcoastlines()
    ur, vr = Gvel.rotate_vectors_to_plot(Utemp, Vtemp)
    m.quiver(Gvel.xpts[::rm, ::rn],
             Gvel.ypts[::rm, ::rn],
             ur[::rm, ::rn],
             vr[::rm, ::rn],
             scale=ra,
             width=0.005)
    plt.ylabel(InputV.name)
    plt.title('Drift div. ' + ts.strftime('%Y%m%d'))

    plt.subplot(1, 3, 2)
    m.pcolormesh(Gvel.xptp, Gvel.yptp, dcrl, cmap='RdBu', rasterized=True)
    m.colorbar(location='bottom')
    p_rng = np.nanpercentile(dcrl, [8, 92])
    pr = np.max(np.abs(p_rng))
    p_rng = [-pr, pr]
    plt.clim(p_rng)
    #     plt.clim([0.0,5.0])
    m.drawcoastlines()
    m.quiver(Gvel.xpts[::rm, ::rn],
             Gvel.ypts[::rm, ::rn],
             ur[::rm, ::rn],
             vr[::rm, ::rn],
             scale=ra,
             width=0.005)
    plt.ylabel(InputV.name)
    plt.title('Drift curl ' + ts.strftime('%Y%m%d'))

    plt.subplot(1, 3, 3)
    rm = int(Gvel.m / a_no)
    rn = int(Gvel.n / a_no)
    ra = np.sqrt(rm + rn)
    ra = ra * a_sc

    m.pcolormesh(Gvel.xptp, Gvel.yptp, dshr, cmap='YlGnBu', rasterized=True)
    m.colorbar(location='bottom')
    p_rng = np.nanpercentile(dshr, [0, 87])
    plt.clim(p_rng)
    #     plt.clim([0.0,0.3])
    m.drawcoastlines()
    m.quiver(Gvel.xpts[::rm, ::rn],
             Gvel.ypts[::rm, ::rn],
             ur[::rm, ::rn],
             vr[::rm, ::rn],
             scale=ra,
             width=0.005)
    plt.ylabel(InputV.name)
    plt.title('Drift shear ' + ts.strftime('%Y%m%d'))
    f.savefig(spath + 'Drift_statistics_' + ts.strftime('%Y%m%d') + '.pdf',
              bbox_inches='tight')
    print('Saving figure: ' + spath + 'Drift_statistics_' +
          ts.strftime('%Y%m%d') + '.pdf')
Exemple #50
0
def main():
    pass # For compatibility between running under Spyder and the CLI

#%%
    pl.ion()

    fname = [u'demo_behavior.h5']
    if fname[0] in ['demo_behavior.h5']:
        # TODO: todocument
        fname = [download_demo(fname[0])]
    # TODO: todocument
    m = cm.load(fname[0], is_behavior=True)

#%% load, rotate and eliminate useless pixels
    m = m.transpose([0, 2, 1])
    m = m[:, 150:, :]
#%% visualize movie
    m.play()
#%% select interesting portion of the FOV (draw a polygon on the figure that pops up, when done press enter)
    # TODO: Put the message below into the image
    print("Please draw a polygon delimiting the ROI on the image that will be displayed after the image; press enter when done")
    mask = np.array(behavior.select_roi(np.median(m[::100], 0), 1)[0], np.float32)
#%%
    n_components = 4  # number of movement looked for
    resize_fact = 0.5  # for computational efficiency movies are downsampled
    # number of standard deviations above mean for the magnitude that are considered enough to measure the angle in polar coordinates
    num_std_mag_for_angle = .6
    only_magnitude = False  # if onlu interested in factorizing over the magnitude
    method_factorization = 'dict_learn'  # could also use nmf
    # number of iterations for the dictionary learning algorithm (Marial et al, 2010)
    max_iter_DL = -30

    spatial_filter_, time_trace_, of_or = cm.behavior.behavior.extract_motor_components_OF(m, n_components, mask=mask,
                                                                                           resize_fact=resize_fact, only_magnitude=only_magnitude, verbose=True, method_factorization='dict_learn', max_iter_DL=max_iter_DL)

#%%
    mags, dircts, dircts_thresh, spatial_masks_thrs = cm.behavior.behavior.extract_magnitude_and_angle_from_OF(
        spatial_filter_, time_trace_, of_or, num_std_mag_for_angle=num_std_mag_for_angle, sav_filter_size=3, only_magnitude=only_magnitude)
#%%
    idd = 0
    axlin = pl.subplot(n_components, 2, 2)
    for mag, dirct, spatial_filter in zip(mags, dircts_thresh, spatial_filter_):
        pl.subplot(n_components, 2, 1 + idd * 2)
        min_x, min_y = np.min(np.where(mask), 1)

        spfl = spatial_filter
        spfl = cm.movie(spfl[None, :, :]).resize(
            1 / resize_fact, 1 / resize_fact, 1).squeeze()
        max_x, max_y = np.add((min_x, min_y), np.shape(spfl))

        mask[min_x:max_x, min_y:max_y] = spfl
        mask[mask < np.nanpercentile(spfl, 70)] = np.nan
        pl.imshow(m[0], cmap='gray')
        pl.imshow(mask, alpha=.5)
        pl.axis('off')

        axelin = pl.subplot(n_components, 2, 2 + idd * 2, sharex=axlin)
        pl.plot(mag / 10, 'k')
        dirct[mag < 0.5 * np.std(mag)] = np.nan
        pl.plot(dirct, 'r-', linewidth=2)

        idd += 1
Exemple #51
0
    second_lower_percentile_dissip_med = [None] * number_of_profiles
    second_upper_percentile_dissip_med = [None] * number_of_profiles
    """

    #compute statistical properties of the saved values
    for index in range(total_number_of_valid_profiles):
        number_of_zero_flux += np.sum(np.abs(BB_flux_list[index]) == 0)
        amount_of_missing_values += np.sum(np.isnan(BB_flux_list[index]))
        #count the number of flux data points

        mean_Osborn_flux[index] = np.nanmean(Osborn_flux_list[index])
        mean_Shih_flux[index] = np.nanmean(Shih_flux_list[index])
        mean_BB_flux[index] = np.nanmean(BB_flux_list[index])
        median_flux[index] = np.nanmedian(BB_flux_list[index])

        upper_percentile_flux[index] = np.nanpercentile(
            BB_flux_list[index], flux_percentile)
        lower_percentile_flux[index] = np.nanpercentile(
            BB_flux_list[index], 100 - flux_percentile)
        second_upper_percentile_flux[index] = np.nanpercentile(
            BB_flux_list[index], second_flux_percentile)
        second_lower_percentile_flux[index] = np.nanpercentile(
            BB_flux_list[index], 100 - second_flux_percentile)
        """        
        mean_min_flux[index] = np.nanmean(oxygen_flux_statistic[index][:,0],axis=0)
        median_min_flux[index] = np.nanmedian(oxygen_flux_statistic[index][:,0],axis=0)

        upper_percentile_min_flux[index] = np.nanpercentile(oxygen_flux_statistic[index][:,0], flux_percentile)
        lower_percentile_min_flux[index] = np.nanpercentile(oxygen_flux_statistic[index][:,0], 100-flux_percentile)
        """
        #bathymetrie_mean[index] = np.nanmean(bathymetrie_statistic[index])
def get_rid_outlier (np_array, lower_percentile, upper_percentile):
    lower_bound = np.nanpercentile(np_array, lower_percentile)
    upper_bound = np.nanpercentile(np_array, upper_percentile)
    np_array[ np_array < lower_bound ] = lower_bound
    np_array[ np_array > upper_bound ] = upper_bound
    return np_array
Exemple #53
0
        hax = ax.imshow(cutamp, cm.Greys_r)
    else:
        cutphi = as_strided(phi[ibeg:iend, jbeg:jend]) * rad2mm

if arguments["--wrap"] is not None:
    cutphi = np.mod(cutphi + float(arguments["--wrap"]), 2 *
                    float(arguments["--wrap"])) - float(arguments["--wrap"])
    vmax = float(arguments["--wrap"])
    vmin = -vmax
elif (arguments["--vmax"] is not None) or (arguments["--vmin"] is not None):
    if arguments["--vmax"] is not None:
        vmax = np.float(arguments["--vmax"])
    if arguments["--vmin"] is not None:
        vmin = np.float(arguments["--vmin"])
else:
    vmax = np.nanpercentile(cutphi, 98)
    vmin = np.nanpercentile(cutphi, 2)

cax = ax.imshow(cutphi,
                cmap,
                interpolation='nearest',
                vmax=vmax,
                vmin=vmin,
                alpha=0.6)

divider = make_axes_locatable(ax)
c = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(cax, cax=c)
if arguments["--title"] == None:
    fig.canvas.set_window_title(infile)
else:
Exemple #54
0
                    latr = np.deg2rad(rlat)
                    weights = np.cos(latr)

                for st in range(len(ttt_rain_dates)):
                    if weightlats:
                        zonmean_ttt = np.ma.mean(masked_rain[st, :, :], axis=1)
                        regmean_ttt = np.ma.average(zonmean_ttt,
                                                    weights=weights)
                        reg_ttt_mean[st] = regmean_ttt
                    else:
                        reg_ttt_mean[st] = np.ma.mean(masked_rain[st, :, :])

                # Getting a long term sum or mean
                tottttrain = np.nansum(reg_ttt_mean)
                rainperttt = np.nanmean(reg_ttt_mean)
                per75rain = np.nanpercentile(reg_ttt_mean, 75)

                if raintype == 'totrain':
                    yvals[cnt] = tottttrain
                elif raintype == 'rainperttt':
                    yvals[cnt] = rainperttt
                elif raintype == 'perc75':
                    yvals[cnt] = per75rain

            ### Put name into string list
            if dset == 'noaa':
                if aspect == 'rain':
                    modnm[cnt] = name + '/' + rainname
                else:
                    modnm[cnt] = name
            else:
        text_labels = [
            'Seiners', 'Trawlers and dredgers', 'Fixed gear',
            'Drifting longlines', 'Squid jiggers',
            'Pole and line, and trollers', 'Unclassified', 'All'
        ]

        for i, varname in enumerate([
                'seiners', 'trawlers_and_dredgers', 'fixed_gear',
                'drifting_longlines', 'squid_jigger',
                'pole_and_line_and_trollers', 'fishing', 'all'
        ]):

            axis = ax.flatten()[i]
            grid_data = np.copy(assumption_A[i])
            grid_data[grid_data == 0] = np.nan
            p05 = np.nanpercentile(grid_data, 5)
            p95 = np.nanpercentile(grid_data, 95)
            heatmap = axis.pcolormesh(lon_bnd,
                                      lat_bnd,
                                      grid_data / p95,
                                      cmap=cmr.chroma_r,
                                      norm=colors.LogNorm(vmin=1e-3, vmax=1))
            axis.add_feature(land_10m)
            axis.text(98,
                      26,
                      text_labels[i],
                      c='w',
                      horizontalalignment='right')

            axis.axis('off')
Exemple #56
0
def dependence_plot(ind,
                    shap_values,
                    features,
                    feature_names=None,
                    display_features=None,
                    interaction_index="auto",
                    color="#ff0052",
                    axis_color="#333333",
                    dot_size=16,
                    alpha=1,
                    title=None,
                    show=True):
    """
    Create a SHAP dependence plot, colored by an interaction feature.

    Parameters
    ----------
    ind : int
        Index of the feature to plot.

    shap_values : numpy.array
        Matrix of SHAP values (# samples x # features)

    features : numpy.array or pandas.DataFrame
        Matrix of feature values (# samples x # features)

    feature_names : list
        Names of the features (length # features)

    display_features : numpy.array or pandas.DataFrame
        Matrix of feature values for visual display (such as strings instead of coded values)

    interaction_index : "auto" or int
        The index of the feature used to color the plot.
    """

    # convert from DataFrames if we got any
    if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>":
        if feature_names is None:
            feature_names = features.columns
        features = features.as_matrix()
    if str(type(display_features)) == "<class 'pandas.core.frame.DataFrame'>":
        if feature_names is None:
            feature_names = display_features.columns
        display_features = display_features.as_matrix()
    elif display_features is None:
        display_features = features

    # allow vectors to be passed
    if len(shap_values.shape) == 1:
        shap_values = np.reshape(shap_values, len(shap_values), 1)
    if len(features.shape) == 1:
        features = np.reshape(features, len(features), 1)

    # get both the raw and display feature values
    xv = features[:, ind]
    xd = display_features[:, ind]
    s = shap_values[:, ind]
    if type(xd[0]) == str:
        name_map = {}
        for i in range(len(xv)):
            name_map[xd[i]] = xv[i]
        xnames = list(name_map.keys())

    # allow a single feature name to be passed alone
    if type(feature_names) == str:
        feature_names = [feature_names]
    name = feature_names[ind]

    # guess what other feature as the stongest interaction with the plotted feature
    if interaction_index == "auto":
        interaction_index = approx_interactions(ind, shap_values, features)[0]

    # get both the raw and display color values
    cv = features[:, interaction_index]
    cd = display_features[:, interaction_index]
    if type(cd[0]) == str:
        cname_map = {}
        for i in range(len(cv)):
            cname_map[cd[i]] = cv[i]
        cnames = list(cname_map.keys())
    clow = np.nanpercentile(features[:, interaction_index], 5)
    chigh = np.nanpercentile(features[:, interaction_index], 95)

    # the actual scatter plot, TODO: adapt the dot_size to the number of data points
    pl.scatter(xv,
               s,
               s=dot_size,
               linewidth=0,
               c=features[:, interaction_index],
               cmap=red_blue,
               alpha=alpha,
               vmin=clow,
               vmax=chigh)

    # draw the color bar
    if type(cd[0]) == str:
        cb = pl.colorbar(ticks=[cname_map[n] for n in cnames])
        cb.set_ticklabels(cnames)
    else:
        cb = pl.colorbar()
    cb.set_label(feature_names[interaction_index], size=13)
    cb.ax.tick_params(labelsize=11)
    cb.set_alpha(1)
    cb.draw_all()

    # make the plot more readable
    pl.gcf().set_size_inches(7.5, 5)
    pl.xlabel(name, color=axis_color, fontsize=13)
    pl.ylabel("SHAP value for " + name, color=axis_color, fontsize=13)
    if title != None:
        pl.title(title, color=axis_color, fontsize=13)
    pl.gca().xaxis.set_ticks_position('bottom')
    pl.gca().yaxis.set_ticks_position('left')
    pl.gca().spines['right'].set_visible(False)
    pl.gca().spines['top'].set_visible(False)
    pl.gca().tick_params(color=axis_color, labelcolor=axis_color, labelsize=11)
    for spine in pl.gca().spines.values():
        spine.set_edgecolor(axis_color)
    if type(xd[0]) == str:
        pl.xticks([name_map[n] for n in xnames],
                  xnames,
                  rotation='vertical',
                  fontsize=11)
    if show:
        pl.show()
Exemple #57
0
thetav_dd_stat       = {}
thetav_p_dd_stat     = {}
thetal_dd_stat       = {}
thetal_p_dd_stat     = {}
theta_dd_stat        = {}
theta_p_dd_stat      = {}
ww_dd_stat           = {}
ww_p_dd_stat         = {}
qv_dd_stat           = {}
ql_dd_stat           = {}
qt_dd_stat           = {}
qt_p_dd_stat         = {}

###################################### Lets's Begin ##############################################
####################################### Get Data #################################################
percenval_ud = np.array([[np.nanpercentile(WW[it,iz,WW[it,iz,:,:] <= dd_thres], percent) for iz in range(WW.shape[1])] for it in range(WW.shape[0])])
percenval_dd = np.array([[np.nanpercentile(WW[it,iz,WW[it,iz,:,:] <= dd_thres], percent) for iz in range(WW.shape[1])] for it in range(WW.shape[0])])

# Initialise lists for the updraught stats
# variables
thetav_ud_stat        = []
thetav_p_ud_stat      = []
thetal_ud_stat        = []
thetal_p_ud_stat      = []
theta_ud_stat         = []
theta_p_ud_stat       = []
ww_ud_stat            = []
ww_p_ud_stat          = []
qv_ud_stat            = []
mcl_ud_stat           = []
qcld_ud_stat          = []
Exemple #58
0
 def cutoff(self, recorded) -> Optional[Union[int, float, complex, np.ndarray]]:
     if not recorded:
         return None
     return np.nanpercentile(list(recorded.values()), (1 - 1 / self.rf) * 100)
Exemple #59
0
def calc_percentiles(ln_ms_age,
                     ln_cooling_age,
                     ln_total_age,
                     initial_mass,
                     final_mass,
                     high_perc,
                     low_perc,
                     datatype='log'):

    if (datatype == 'log'):
        ms_age_median = np.nanpercentile(ln_ms_age, 50)
        ms_age_err_low = ms_age_median - np.nanpercentile(ln_ms_age, low_perc)
        ms_age_err_high = np.nanpercentile(ln_ms_age,
                                           high_perc) - ms_age_median

        cooling_age_median = np.nanpercentile(ln_cooling_age, 50)
        cooling_age_err_low = cooling_age_median - np.nanpercentile(
            ln_cooling_age, low_perc)
        cooling_age_err_high = np.nanpercentile(ln_cooling_age,
                                                high_perc) - cooling_age_median

        total_age_median = np.nanpercentile(ln_total_age, 50)
        total_age_err_low = total_age_median - np.nanpercentile(
            ln_total_age, low_perc)
        total_age_err_high = np.nanpercentile(ln_total_age,
                                              high_perc) - total_age_median

        initial_mass_median = np.nanpercentile(initial_mass, 50)
        initial_mass_err_low = initial_mass_median - np.nanpercentile(
            initial_mass, low_perc)
        initial_mass_err_high = np.nanpercentile(
            initial_mass, high_perc) - initial_mass_median

        final_mass_median = np.nanpercentile(final_mass, 50)
        final_mass_low = final_mass_median - np.nanpercentile(
            final_mass, low_perc)
        final_mass_high = np.nanpercentile(final_mass,
                                           high_perc) - final_mass_median

    if (datatype == 'Gyr'):
        ms_age_median = np.nanpercentile((10**ln_ms_age) / 1e9, 50)
        ms_age_err_low = ms_age_median - np.nanpercentile(
            (10**ln_ms_age) / 1e9, low_perc)
        ms_age_err_high = np.nanpercentile(
            (10**ln_ms_age) / 1e9, high_perc) - ms_age_median

        cooling_age_median = np.nanpercentile((10**ln_cooling_age) / 1e9, 50)
        cooling_age_err_low = cooling_age_median - np.nanpercentile(
            (10**ln_cooling_age) / 1e9, low_perc)
        cooling_age_err_high = np.nanpercentile(
            (10**ln_cooling_age) / 1e9, high_perc) - cooling_age_median

        total_age_median = np.nanpercentile((10**ln_total_age) / 1e9, 50)
        total_age_err_low = total_age_median - np.nanpercentile(
            (10**ln_total_age) / 1e9, low_perc)
        total_age_err_high = np.nanpercentile(
            (10**ln_total_age) / 1e9, high_perc) - total_age_median

        initial_mass_median = np.nanpercentile(initial_mass, 50)
        initial_mass_err_low = initial_mass_median - np.nanpercentile(
            initial_mass, low_perc)
        initial_mass_err_high = np.nanpercentile(
            initial_mass, high_perc) - initial_mass_median

        final_mass_median = np.nanpercentile(final_mass, 50)
        final_mass_low = final_mass_median - np.nanpercentile(
            final_mass, low_perc)
        final_mass_high = np.nanpercentile(final_mass,
                                           high_perc) - final_mass_median

    return [
        ms_age_median, ms_age_err_low, ms_age_err_high, cooling_age_median,
        cooling_age_err_low, cooling_age_err_high, total_age_median,
        total_age_err_low, total_age_err_high, initial_mass_median,
        initial_mass_err_low, initial_mass_err_high, final_mass_median,
        final_mass_low, final_mass_high
    ]
Exemple #60
0
def acolite_map(inputfile=None, output=None, parameters=None, 
                dpi=300, ext='png', mapped=True, max_dim = 1000, limit=None,
                auto_range=False, range_percentiles=(5,95), dataset_rescale=False,
                map_title=True, 
                map_colorbar=False, map_colorbar_orientation='vertical',#'horizontal', 
                rgb_rhot = False, rgb_rhos = False, 
                red_wl = 660, green_wl = 560, blue_wl = 480, rgb_min = [0.0]*3, rgb_max = [0.15]*3, rgb_pan_sharpen = False, map_parameters_pan=True,
                map_fillcolor='White',
                map_scalepos = 'LR', map_scalebar = True, map_scalecolor='Black', map_scalecolor_rgb='White', map_scalelen=None, map_projection='tmerc',
                map_colorbar_edge=True, map_points=None, return_image=False, map_raster=False):

    import os, copy
    import datetime, time, dateutil.parser

    from acolite.shared import datascl,nc_data,nc_datasets,nc_gatts,qmap,closest_idx
    from acolite.acolite import pscale
    import acolite as ac

    from numpy import nanpercentile, log10, isnan, dstack
    from scipy.ndimage import zoom

    import matplotlib

    if not os.path.exists(inputfile):
        print('File {} not found.'.format(inputfile))
        return(False)

    ## run through maps
    maps = {'rhot':rgb_rhot,'rhos':rgb_rhos, 'parameters':parameters != None}
    if all([maps[m] == False for m in maps]): return()
        
    ## get parameter scaling
    psc = pscale()

    ## read netcdf info    
    l2w_datasets = nc_datasets(inputfile)
    print(l2w_datasets)

    gatts = nc_gatts(inputfile)
    if 'MISSION_INDEX' in gatts:
        sat, sen = gatts['MISSION'], gatts['MISSION_INDEX']
        stime = dateutil.parser.parse(gatts['IMAGING_DATE']+' '+gatts['IMAGING_TIME']) 
        obase = '{}_{}_{}'.format(sat, sen, stime.strftime('%Y_%m_%d_%H_%M_%S'))
    else:
        sp = gatts['sensor'].split('_') if 'sensor' in gatts else gatts['SATELLITE_SENSOR'].split('_')  
        sat, sen = sp[0], sp[1]
        stime = dateutil.parser.parse(gatts['isodate'] if 'isodate' in gatts else gatts['ISODATE']) 
        obase = gatts['output_name'] if 'output_name' in gatts else gatts['obase']

    ## find pan sharpening dataset
    if rgb_pan_sharpen:
        if sat not in ['L7','L8']: rgb_pan_sharpen = False
        tmp = os.path.splitext(inputfile)
        l1_pan_ncdf = '{}L1R_pan{}'.format(tmp[0][0:-3],tmp[1])
        if os.path.exists(l1_pan_ncdf):
            pan_data = nc_data(l1_pan_ncdf, 'rhot_pan')
        else:
            print('L1 pan NetCDF file not found')
            rgb_pan_sharpen=False

    if output is not None:
        odir = output
    else:
        odir = gatts['output_dir']
        
    if not os.path.exists(odir): os.makedirs(odir)

    scf= 1.
    rescale = 1.0

    #if dataset_rescale or mapped:
    lon = nc_data(inputfile, 'lon')
    if mapped: 
        lat = nc_data(inputfile, 'lat')
        if rgb_pan_sharpen:
            lon_pan = zoom(lon, zoom=2, order=1)
            lat_pan = zoom(lat, zoom=2, order=1)

    ## set up mapping info
    if True:
        from numpy import linspace, tile, ceil, isnan, nan
        mask_val = -9999.9999
        from scipy.ndimage.interpolation import map_coordinates

        ## rescale to save memory
        dims = lon.shape
        dsc = (dims[0]/max_dim, dims[1]/max_dim)
        scf/=max(dsc)

        if rgb_pan_sharpen: scf = 1.0

        if (scf < 1.) and dataset_rescale:
            sc_dims = (int(ceil(dims[0] * scf)), int(ceil(dims[1] * scf)))
            xdim =  linspace(0,dims[1],sc_dims[1]).reshape(1,sc_dims[1])
            ydim =  linspace(0,dims[0],sc_dims[0]).reshape(sc_dims[0],1)
            xdim = tile(xdim, (sc_dims[0],1))
            ydim = tile(ydim, (1,sc_dims[1]))

            resc = [ydim,xdim]
            xdim, ydim = None, None
            lon = map_coordinates(lon, resc, mode='nearest')
            lat = map_coordinates(lat, resc, mode='nearest')
        else:
            rescale = scf

    ## run through parameters
    for mi in maps:
        if not maps[mi]: continue

        if mi == 'parameters':
            if rgb_pan_sharpen:
                if map_parameters_pan & mapped:
                    lon = lon_pan * 1.0
                    lon_pan = None
                    lat = lat_pan * 1.0
                    lat_pan = None
                pan_data, lon_pan, lat_pan = None, None, None

            print('Mapping {}'.format(mi))
            if type(parameters) is not list: parameters=[parameters]
            for pid, par in enumerate(parameters):
                pard = None

                ## check if this parameter exists
                if par not in l2w_datasets:
                    print('Parameter {} not in file {}.'.format(par, inputfile))
                    continue
                    
                print('Mapping {}'.format(par))
                ## read data
                data = nc_data(inputfile, par)
                if (rgb_pan_sharpen) & (map_parameters_pan):
                    data = zoom(data, zoom=2, order=1)

                ## rescale data
                if (scf != 1.0) and dataset_rescale:
                    data[isnan(data)] = mask_val
                    data = map_coordinates(data, resc, cval=mask_val)
                    data[data <= int(mask_val)] = nan
                    data[data <= 0] = nan

                data_range = nanpercentile(data, range_percentiles)

                ## get parameter mapping configuration
                if par in psc:
                    pard = copy.deepcopy(psc[par])
                else:
                     tmp = par.split('_')
                     par_generic = '_'.join((tmp[0:-1]+['*']))
                     if par_generic in psc: 
                         pard = copy.deepcopy(psc[par_generic])
                         try: ## add wavelength to generic name
                             wave = int(tmp[len(tmp)-1])
                             pard['name'] = '{} ({} nm)'.format(pard['name'], wave)
                         except:
                             pass
                     else: pard= {'color table':'default', 'min':data_range[0], 'max':data_range[1],
                                  'log': False, 'name':par, 'unit':'', 'parameter':par}

                if pard['color table'] == 'default': pard['color table']='viridis'
                ctfile = "{}/{}/{}.txt".format(ac.config['pp_data_dir'], 'Shared/ColourTables', pard['color table'])

                if os.path.exists(ctfile):
                    from matplotlib.colors import ListedColormap
                    from numpy import loadtxt
                    pard['color table'] = ListedColormap(loadtxt(ctfile)/255.)

                if 'title' not in pard: pard['title']='{} [{}]'.format(pard['name'],pard['unit'])
                if auto_range:
                    pard['min']=data_range[0]
                    pard['max']=data_range[1]

                if isnan(pard['min']): pard['min']=data_range[0]
                if isnan(pard['max']): pard['max']=data_range[1]

                ## outputfile
                outputfile = '{}/{}_{}.png'.format(odir,obase,par)

                if map_title:
                    title = '{} {}/{} {}'.format(pard['name'], sat, sen, stime.strftime('%Y-%m-%d (%H:%M UTC)'))
                else:
                    title = None

                ## use qmap option
                if mapped:
                    range = (pard['min'], pard['max'])
                    if 'limit' in gatts:
                        limit = gatts['limit']

                    if ('xx' not in locals()):
                        xx, yy, m = qmap(data, lon, lat, outputfile=outputfile, title=title, rescale=rescale,
                                           colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, cmap=pard['color table'],
                                           label=pard['title'], range=range, log = pard['log'], map_fillcolor=map_fillcolor,
                                           limit=limit, dpi=dpi, points=map_points, projection=map_projection,
                                           scalebar=map_scalebar, scalepos=map_scalepos, 
                                           scalecolor=map_scalecolor, scalelen=map_scalelen)                
                    else:
                        xx, yy, m = qmap(data, lon, lat, outputfile=outputfile, title=title, rescale=rescale,
                                           colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge, cmap=pard['color table'],
                                           label=pard['title'], range=range, log = pard['log'], map_fillcolor=map_fillcolor, 
                                           limit=limit, dpi=dpi, points=map_points, projection=map_projection,
                                           scalebar=map_scalebar, scalepos=map_scalepos, 
                                           scalecolor=map_scalecolor, scalelen=map_scalelen, xx=xx, yy=yy, m=m)

                else:
                    import matplotlib.cm as cm
                    from matplotlib.colors import ListedColormap
                    cmap = cm.get_cmap(pard['color table'])
                    cmap.set_bad(map_fillcolor)
                    cmap.set_under(map_fillcolor)

                    if not map_raster:
                        ## set up plot
                        fig = matplotlib.figure.Figure()
                        canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
                        ax = fig.add_subplot(111)

                        print(pard['min'], pard['max'])

                        if pard['log']:
                            from matplotlib.colors import LogNorm
                            cax = ax.imshow(data, vmin=pard['min'], vmax=pard['max'], cmap=cmap,
                                               norm=LogNorm(vmin=pard['min'], vmax=pard['max']))
                        else:
                            cax = ax.imshow(data, vmin=pard['min'], vmax=pard['max'], cmap=cmap)

                        if map_colorbar:
                            if map_colorbar_orientation == 'vertical':
                                cbar = fig.colorbar(cax, orientation='vertical')
                                cbar.ax.set_ylabel(pard['title'])
                            else:
                                cbar = fig.colorbar(cax, orientation='horizontal')
                                cbar.ax.set_xlabel(pard['title'])

                            if map_title: ax.set_title(title)
                            ax.axis('off')
                            canvas.print_figure(outputfile, dpi=dpi, bbox_inches='tight')
                    else:
                        from PIL import Image
                        ## rescale for mapping
                        if pard['log']:
                            from numpy import log10
                            datasc = datascl(log10(data), dmin=log10(pard['min']), dmax=log10(pard['max']))
                        else:
                            datasc = datascl(data, dmin=pard['min'], dmax=pard['max'])

                        d = cmap(datasc)
                        for wi in (0,1,2):
                            ## convert back to 8 bit channels (not ideal)
                            d_ = datascl(d[:,:,wi], dmin=0, dmax=1)
                            if wi == 0: im = d_
                            else: im = dstack((im,d_))

                        img = Image.fromarray(im)

                        ## output image    
                        img.save(outputfile)

                print('Wrote {}'.format(outputfile))
        else:
            print('Mapping RGB {}'.format(mi))
            ## RGBs
            waves = [float(ds.split('_')[1]) for ds in l2w_datasets if ds[0:4] == mi]
            if len(waves) == 0:
                print('No appropriate datasets found for RGB {} in {}'.format(mi, inputfile))
                continue

            ## read datasets
            for wi, wl in enumerate([red_wl, green_wl, blue_wl]):
                idx, wave = closest_idx(waves, wl)
                cpar = '{}_{}'.format(mi, int(wave))
                ## read data
                data = nc_data(inputfile, cpar)

                if rgb_pan_sharpen:
                    data = zoom(data, zoom=2, order=1)
                    if wi == 0: vis_i = data * 1.0
                    else: vis_i += data
                    if wi == 2:
                        vis_i /= 3
                        pan_i = vis_i/pan_data
                        vis_i = None

                ## rescale data
                if (scf != 1.0) and dataset_rescale:
                    data[isnan(data)] = mask_val
                    data = map_coordinates(data, resc, cval=mask_val)
                    data[data <= int(mask_val)] = nan
                    data[data <= 0] = nan

                ## stack image
                if wi == 0:
                    image = data
                else:
                    image = dstack((image,data))
                
            ## rescale data between 0 and 1
            for wi in (2,1,0):
                if rgb_pan_sharpen: image[:,:,wi] /= pan_i
                image[:,:,wi] = datascl(image[:,:,wi], dmin=rgb_min[wi], dmax=rgb_max[wi])/255.

            par = r'$\rho_{}$'.format(mi[3]) + ' RGB'
            if map_title:
                title = '{} {}/{} {}'.format(par, sat, sen, stime.strftime('%Y-%m-%d (%H:%M UTC)'))
            else:
                title = None

            ## outputfile
            if rgb_pan_sharpen: 
                outputfile = '{}/{}_rgb_{}_pan.png'.format(odir,obase,mi)
            else:
                outputfile = '{}/{}_rgb_{}.png'.format(odir,obase,mi)

            # use qmap option
            if mapped:
                if 'limit' in gatts:
                    limit = gatts['limit']

                if rgb_pan_sharpen:
                    ret = qmap(image, lon_pan, lat_pan, outputfile=outputfile, title=title, rescale=rescale,
                                               colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge,
                                               limit=limit, dpi=dpi, points=map_points, projection=map_projection,
                                               scalebar=map_scalebar, scalepos=map_scalepos, 
                                               scalecolor=map_scalecolor_rgb, scalelen=map_scalelen)      
                    ret = None     
                else:
                    if ('xx' not in locals()):
                        xx, yy, m = qmap(image, lon, lat, outputfile=outputfile, title=title, rescale=rescale,
                                               colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge,
                                               limit=limit, dpi=dpi, points=map_points, projection=map_projection,
                                               scalebar=map_scalebar, scalepos=map_scalepos, 
                                               scalecolor=map_scalecolor_rgb, scalelen=map_scalelen)                
                    else:
                        xx, yy, m = qmap(image, lon, lat, outputfile=outputfile, title=title, rescale=rescale,
                                               colorbar=map_colorbar_orientation, colorbar_edge=map_colorbar_edge,
                                               limit=limit, dpi=dpi, points=map_points, projection=map_projection,
                                               scalebar=map_scalebar, scalepos=map_scalepos, 
                                               scalecolor=map_scalecolor_rgb, scalelen=map_scalelen, xx=xx, yy=yy, m=m)


            else:
                if not map_raster:
                    ## set up plot
                    fig = matplotlib.figure.Figure()
                    canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
                    ax = fig.add_subplot(111)
                    ax.imshow(image)
                    image = None
                    
                    if map_title: ax.set_title(title)
                    ax.axis('off')
                    canvas.print_figure(outputfile, dpi=dpi, bbox_inches='tight')
                else:
                    from PIL import Image
                    for wi in (0,1,2):
                        # convert again to 8 bit channels (not ideal)
                        data = datascl(image[:,:,wi], dmin=0, dmax=1)
                        if wi == 0:
                            im = data
                        else:
                            im = dstack((im,data))

                    img = Image.fromarray(im)
                    img.save(outputfile)

            print('Wrote {}'.format(outputfile))