コード例 #1
0
ファイル: generic.py プロジェクト: davidandrzej/pandas
    def cumprod(self, axis=None):
        """
        Return cumulative product over requested axis as DataFrame

        Parameters
        ----------
        axis : {0, 1}
            0 for row-wise, 1 for column-wise

        Returns
        -------
        y : DataFrame
        """
        if axis is None:
            axis = self._default_stat_axis
        else:
            axis = self._get_axis_number(axis)

        y = self.values.copy()
        if not issubclass(y.dtype.type, np.int_):
            mask = np.isnan(self.values)
            np.putmask(y, mask, 1.0)
            result = y.cumprod(axis)
            np.putmask(result, mask, np.nan)
        else:
            result = y.cumprod(axis)
        return self._wrap_array(result, self.axes, copy=False)
コード例 #2
0
ファイル: trigger_fits.py プロジェクト: a-r-williamson/pycbc
def cum_fit(distr, xvals, alpha, thresh):
    """
    Integral of the fitted function above a given value (reverse CDF)

    The fitted function is normalized to 1 above threshold

    Parameters
    ----------
    xvals : sequence of floats
        Values where the function is to be evaluated
    alpha : float
        The fitted parameter
    thresh : float
        Threshold value applied to fitted values

    Returns
    -------
    cum_fit : array of floats
        Reverse CDF of fitted function at the requested xvals
    """
    xvals = numpy.array(xvals)
    cum_fit = cum_fndict[distr](xvals, alpha, thresh)
    # set fitted values below threshold to 0
    numpy.putmask(cum_fit, xvals < thresh, 0.)
    return cum_fit
コード例 #3
0
ファイル: nanops.py プロジェクト: X1mengYu/pandas
def nankurt(values, axis=None, skipna=True):
    if not isinstance(values.dtype.type, np.floating):
        values = values.astype('f8')

    mask = isnull(values)
    count = _get_counts(mask, axis)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    A = values.sum(axis) / count
    B = (values ** 2).sum(axis) / count - A ** 2
    C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B
    D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A

    B = _zero_out_fperr(B)
    C = _zero_out_fperr(C)
    D = _zero_out_fperr(D)

    result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) /
              ((count - 2.) * (count - 3.)))
    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 4] = np.nan
        return result
    else:
        result = 0 if B == 0 else result
        if count < 4:
            return np.nan
        return result
コード例 #4
0
ファイル: nanops.py プロジェクト: OspreyX/pandas
def nankurt(values, axis=None, skipna=True):

    mask = isnull(values)
    if not is_floating_dtype(values):
        values = values.astype('f8')

    count = _get_counts(mask, axis)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    A = values.sum(axis) / count
    B = (values ** 2).sum(axis) / count - A ** 2
    C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B
    D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A

    B = _zero_out_fperr(B)
    D = _zero_out_fperr(D)

    if not isinstance(B, np.ndarray):
        # if B is a scalar, check these corner cases first before doing division
        if count < 4:
            return np.nan
        if B == 0:
            return 0

    result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) /
              ((count - 2.) * (count - 3.)))

    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 4] = np.nan

    return result
コード例 #5
0
ファイル: trigger_fits.py プロジェクト: a-r-williamson/pycbc
def fit_fn(distr, xvals, alpha, thresh):
    """
    The fitted function normalized to 1 above threshold

    To normalize to a given total count multiply by the count.

    Parameters
    ----------
    xvals : sequence of floats
        Values where the function is to be evaluated
    alpha : float
        The fitted parameter
    thresh : float
        Threshold value applied to fitted values

    Returns
    -------
    fit : array of floats
        Fitted function at the requested xvals
    """
    xvals = numpy.array(xvals)
    fit = fitfn_dict[distr](xvals, alpha, thresh)
    # set fitted values below threshold to 0
    numpy.putmask(fit, xvals < thresh, 0.)
    return fit
コード例 #6
0
ファイル: nanops.py プロジェクト: X1mengYu/pandas
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True):
    """ utility to get the values view, mask, dtype
        if necessary copy and mask using the specified fill_value
        copy = True will force the copy """
    values = _values_from_object(values)
    if isfinite:
        mask = _isfinite(values)
    else:
        mask = isnull(values)

    dtype    = values.dtype
    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = com._maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    values = _view_if_needed(values)
    return values, mask, dtype
コード例 #7
0
ファイル: nanops.py プロジェクト: X1mengYu/pandas
def nanskew(values, axis=None, skipna=True):
    if not isinstance(values.dtype.type, np.floating):
        values = values.astype('f8')

    mask = isnull(values)
    count = _get_counts(mask, axis)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, 0)

    A = values.sum(axis) / count
    B = (values ** 2).sum(axis) / count - A ** 2
    C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B

    # floating point error
    B = _zero_out_fperr(B)
    C = _zero_out_fperr(C)

    result = ((np.sqrt((count ** 2 - count)) * C) /
              ((count - 2) * np.sqrt(B) ** 3))

    if isinstance(result, np.ndarray):
        result = np.where(B == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if B == 0 else result
        if count < 3:
            return np.nan
        return result
コード例 #8
0
ファイル: generic.py プロジェクト: ContinuumIO/pandas
    def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
                   **kwds):
        """
        Percent change over given number of periods

        Parameters
        ----------
        periods : int, default 1
            Periods to shift for forming percent change
        fill_method : str, default 'pad'
            How to handle NAs before computing percent changes
        limit : int, default None
            The number of consecutive NAs to fill before stopping
        freq : DateOffset, timedelta, or offset alias string, optional
            Increment to use from time series API (e.g. 'M' or BDay())

        Returns
        -------
        chg : Series or DataFrame
        """
        if fill_method is None:
            data = self
        else:
            data = self.fillna(method=fill_method, limit=limit)
        rs = data / data.shift(periods=periods, freq=freq, **kwds) - 1
        if freq is None:
            mask = com.isnull(self.values)
            np.putmask(rs.values, mask, np.nan)
        return rs
コード例 #9
0
ファイル: generic.py プロジェクト: ContinuumIO/pandas
    def cummin(self, axis=None, skipna=True):
        """
        Return DataFrame of cumulative min over requested axis.

        Parameters
        ----------
        axis : {0, 1}
            0 for row-wise, 1 for column-wise
        skipna : boolean, default True
            Exclude NA/null values. If an entire row/column is NA, the result
            will be NA

        Returns
        -------
        y : DataFrame
        """
        if axis is None:
            axis = self._default_stat_axis
        else:
            axis = self._get_axis_number(axis)

        y = self.values.copy()
        if not issubclass(y.dtype.type, np.integer):
            mask = np.isnan(self.values)

            if skipna:
                np.putmask(y, mask, np.inf)

            result = np.minimum.accumulate(y, axis)

            if skipna:
                np.putmask(result, mask, np.nan)
        else:
            result = np.minimum.accumulate(y,axis)
        return self._wrap_array(result, self.axes, copy=False)
コード例 #10
0
ファイル: nanops.py プロジェクト: Shruti29/pandas
def nanall(values, axis=None, skipna=True):
    mask = isnull(values)

    if skipna:
        values = values.copy()
        np.putmask(values, mask, True)
    return values.all(axis)
コード例 #11
0
ファイル: frame.py プロジェクト: hhamalai/pandas
    def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
                       limit=None):
        if level is not None:
            raise Exception('Reindex by level not supported for sparse')

        if self.index.equals(index):
            if copy:
                return self.copy()
            else:
                return self

        if len(self.index) == 0:
            return SparseDataFrame(index=index, columns=self.columns)

        indexer = self.index.get_indexer(index, method, limit=limit)
        indexer = com._ensure_platform_int(indexer)
        mask = indexer == -1
        need_mask = mask.any()

        new_series = {}
        for col, series in self.iteritems():
            values = series.values
            new = values.take(indexer)

            if need_mask:
                np.putmask(new, mask, fill_value)

            new_series[col] = new

        return SparseDataFrame(new_series, index=index, columns=self.columns,
                               default_fill_value=self.default_fill_value)
コード例 #12
0
ファイル: groupby.py プロジェクト: gwtaylor/pandas
    def _make_labels(self):
        if self._was_factor:  # pragma: no cover
            raise Exception('Should not call this method grouping by level')
        else:
            values = self.grouper
            if values.dtype != np.object_:
                values = values.astype('O')

            # khash
            rizer = lib.Factorizer(len(values))
            labels, counts = rizer.factorize(values, sort=False)

            uniques = Index(rizer.uniques, name=self.name)
            if self.sort and len(counts) > 0:
                sorter = uniques.argsort()
                reverse_indexer = np.empty(len(sorter), dtype=np.int32)
                reverse_indexer.put(sorter, np.arange(len(sorter)))

                mask = labels < 0
                labels = reverse_indexer.take(labels)
                np.putmask(labels, mask, -1)

                uniques = uniques.take(sorter)
                counts = counts.take(sorter)

            self._labels = labels
            self._group_index = uniques
            self._counts = counts
コード例 #13
0
ファイル: strings.py プロジェクト: antoinelacroix/pandas
def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
    from pandas.core.series import Series

    if not len(arr):
        return np.ndarray(0, dtype=dtype)

    if isinstance(arr, Series):
        arr = arr.values
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr, dtype=object)
    if na_mask:
        mask = isnull(arr)
        try:
            result = lib.map_infer_mask(arr, f, mask.view(np.uint8))
        except (TypeError, AttributeError):
            def g(x):
                try:
                    return f(x)
                except (TypeError, AttributeError):
                    return na_value
            return _map(g, arr, dtype=dtype)
        if na_value is not np.nan:
            np.putmask(result, mask, na_value)
            if result.dtype == object:
                result = lib.maybe_convert_objects(result)
        return result
    else:
        return lib.map_infer(arr, f)
コード例 #14
0
ファイル: merge.py プロジェクト: 17705724576-M13Kd/pandas
def _factorize_keys(lk, rk, sort=True):
    if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk):
        klass = lib.Int64Factorizer
        lk = com._ensure_int64(lk)
        rk = com._ensure_int64(rk)
    else:
        klass = lib.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab = rizer.factorize(lk)
    rlab = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        uniques = rizer.uniques.to_array()
        llab, rlab = _sort_labels(uniques, llab, rlab)

    # NA group
    lmask = llab == -1; lany = lmask.any()
    rmask = rlab == -1; rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
コード例 #15
0
ファイル: algorithms.py プロジェクト: SocialQ/pandas
def factorize(values, sort=False, order=None, na_sentinel=-1):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : sequence
    sort :
    order :

    Returns
    -------
    """
    hash_klass, values = _get_hash_table_and_cast(values)

    uniques = []
    table = hash_klass(len(values))
    labels, counts = table.get_labels(values, uniques, 0, na_sentinel)

    uniques = com._asarray_tuplesafe(uniques)
    if sort and len(counts) > 0:
        sorter = uniques.argsort()
        reverse_indexer = np.empty(len(sorter), dtype=np.int32)
        reverse_indexer.put(sorter, np.arange(len(sorter)))

        mask = labels < 0
        labels = reverse_indexer.take(labels)
        np.putmask(labels, mask, -1)

        uniques = uniques.take(sorter)
        counts = counts.take(sorter)

    return labels, uniques, counts
コード例 #16
0
ファイル: gkcLinear.py プロジェクト: xyuan/gkc
def getFrequency(T, D, start, stop, dir='Y'):
  import scipy.ndimage
  
  freq_list   = []
  N      = len(D[:,0]) 
    
  print "Fitting from T : ", T[start], " - ", T[stop]
 
  # Note We assume constant time-steps !
  for n in range(N): 
     
    time_series = D[n,start:stop]
    FS = np.fft.rfft(time_series) #np.sin(time_series))
    # Get Maximum Frequency
    m     = np.argmax(abs(FS))
    fftfreq = np.fft.fftfreq(len(time_series), d = (T[-10]-T[-11])) 
    
    abs_freq =  2.*np.pi*fftfreq[m]

    # Needs sqrt(2.) from velocity normalization
    
    # Get sign of frequency by taking gradient of phase shift (how to deal with jump?)
    time_series = scipy.ndimage.gaussian_filter(time_series, 0.01)
    grad = np.gradient(time_series, T[-10]-T[-11])
    # remove jump values
    np.putmask(grad, abs(grad) > 1.05*abs_freq, 0.)
    np.putmask(grad, abs(grad) < 0.95*abs_freq, 0.)
    sig = -np.sign(sum(grad))

    freq_list.append(sig * abs_freq)

  print "Getting Frequency from T = ", T[start], " to T = " , T[stop]

  return np.array(freq_list)
コード例 #17
0
ファイル: apply_segments.py プロジェクト: cmertes/sample_code
def remapRaster(infile, out_file, lookup):
        '''remap raster values to those in lookup table'''
        inmap = gdal.Open(infile)
        rows = inmap.RasterYSize
        cols = inmap.RasterXSize
        map_arr = inmap.ReadAsArray()

        #remap values
        remap_dict = df.getDictfromCSV(lookup,'\t',1,0)
        remap_dict[0]=2000 #ag
        remap_dict[255]=32767 #nodata
        map_out = map_arr.astype(np.int16)
        print 'input map labels', np.unique(map_out)
        for r in remap_dict:
                print 'reclassifying', r, ': ', remap_dict[r]
                outval=int(remap_dict[r])
                temp=np.equal(map_out, int(r))
                np.putmask(map_out, temp, int(remap_dict[r]))
                temp=None
        print 'output map labels', np.unique(map_out)
        #output raster
        driver=inmap.GetDriver()
        outDs = driver.Create(out_file, cols, rows, 1, GDT_Int16)
        outDs.SetGeoTransform(inmap.GetGeoTransform())
        outDs.SetProjection(inmap.GetProjection())
        outband = outDs.GetRasterBand(1)

        outband.WriteArray(map_out, 0 ,0)
        outband.SetNoDataValue(32767)
        outband.FlushCache()
コード例 #18
0
ファイル: closest_element.py プロジェクト: jongman/typ
def binary_search_np(A, B):
    # assume A and B are numpy arrays
    idx2 = np.minimum(len(A) - 1, np.searchsorted(A, B)) 
    idx1 = np.maximum(0, idx2 - 1)
    idx2_is_better = np.abs(A[idx1] - B) > np.abs(A[idx2] - B)
    np.putmask(idx1, idx2_is_better, idx2)
    return A[idx1]
コード例 #19
0
ファイル: lib_operations.py プロジェクト: revoltek/losoto
def normalize_phase(phase):
    """
    Normalize phase to the range [-pi, pi].
    
    Parameters
    ----------
    phase : array of float
        Phase to normalize.
    
    Returns
    -------
    array of float
        Normalized phases.
    """

    # Convert to range [-2*pi, 2*pi].
    out = np.fmod(phase, 2.0 * np.pi)
    # Remove nans
    nans = np.isnan(out)
    np.putmask(out, nans, 0)
    # Convert to range [-pi, pi]
    out[out < -np.pi] += 2.0 * np.pi
    out[out > np.pi] -= 2.0 * np.pi
    # Put nans back
    np.putmask(out, nans, np.nan)
    return out
コード例 #20
0
ファイル: ppgplot_spb.py プロジェクト: vrooje/galaxyzoo2
def ave_array_2d(x, y, z, nxbin, xlow, xhigh, nybin, ylow, yhigh,
		 completeness=None):
    nx = len(x)
    ny = len(y)
    if nx != ny:
	print 'Error: len(x) != len(y)'
	return
    xstep = float(xhigh-xlow)/nxbin
    ystep = float(yhigh-ylow)/nybin
    x_bin = N.arange(nxbin) * xstep + xlow + xstep/2.0
    y_bin = N.arange(nybin) * ystep + ylow + ystep/2.0
    d_bin = N.zeros((nybin, nxbin), N.float)
    z_bin = N.zeros((nybin, nxbin), N.float)
    for k in range(nx):
	jbin_index = int((x[k] - xlow)/xstep)
	ibin_index = int((y[k] - ylow)/ystep)
	if completeness is None:
	    c = 1
	else:
	    c = completeness[k]
	if 0 <= jbin_index < nxbin and 0 <= ibin_index < nybin:
	    d_bin[ibin_index, jbin_index] += 1.0/c
            z_bin[ibin_index, jbin_index] += z[k]
    z_bin /= d_bin
    N.putmask(z_bin, d_bin < 1, 0.0)
    return x_bin, y_bin, z_bin
コード例 #21
0
ファイル: test_setitem.py プロジェクト: bwignall/pandas
    def test_frame_getitem_setitem_boolean(
            self, multiindex_dataframe_random_data):
        frame = multiindex_dataframe_random_data
        df = frame.T.copy()
        values = df.values

        result = df[df > 0]
        expected = df.where(df > 0)
        tm.assert_frame_equal(result, expected)

        df[df > 0] = 5
        values[values > 0] = 5
        tm.assert_almost_equal(df.values, values)

        df[df == 5] = 0
        values[values == 5] = 0
        tm.assert_almost_equal(df.values, values)

        # a df that needs alignment first
        df[df[:-1] < 0] = 2
        np.putmask(values[:-1], values[:-1] < 0, 2)
        tm.assert_almost_equal(df.values, values)

        with pytest.raises(TypeError, match='boolean values only'):
            df[df * 0] = 2
コード例 #22
0
ファイル: family.py プロジェクト: scottpiraino/statsmodels
    def deviance(self, Y, mu, scale=1.):
        '''
        Poisson deviance function

        Parameters
        ----------
        Y : array-like
            Endogenous response variable
        mu : array-like
            Fitted mean response variable
        scale : float, optional
            An optional scale argument

        Returns
        -------
        deviance : float
            The deviance function at (Y,mu) as defined below.

        Notes
        -----
        If a constant term is included it is defined as

        :math:`deviance = 2*\\sum_{i}(Y*\\log(Y/\\mu))`
        '''
        if np.any(Y==0):
            retarr = np.zeros(Y.shape)
            Ymu = Y/mu
            mask = Ymu != 0
            YmuMasked = Ymu[mask]
            Ymasked = Y[mask]
            np.putmask(retarr, mask, Ymasked*np.log(YmuMasked)/scale)
            return 2*np.sum(retarr)
        else:
            return 2*np.sum(Y*np.log(Y/mu))/scale
コード例 #23
0
ファイル: featsel.py プロジェクト: bpartridge/PyML
def usefullness(data, targetClass, otherClass = None, **args) :
    '''A feature score for discrete data
    optional arguments:
    threshold
    fraction
    '''

    if 'threshold' in args :
        threshold = args['threshold']
    else :
        threshold = 5
    if 'fraction' in args :
        fraction = args['fraction']
    else :
        fraction = 0.0

    Y, targetClassSize, otherClassSize, otherI, feature = parseArgs(
        data, targetClass, otherClass, **args)

    threshold = max(threshold, fraction * float(targetClassSize))        

    s1 = featureCount(data, targetClass=targetClass, Y=Y, feature=feature)

    s2 = featureCount(data, I = otherI, Y=Y,
                      feature=feature) / float(otherClassSize)

    s2 = 1 - s2

    numpy.putmask(s2, numpy.less(s1, threshold), 0.0)

    return s2
コード例 #24
0
ファイル: lobos_image.py プロジェクト: varenius/lofar-lb
def get_closure_phase(infile='L401323_SB349_uv.dppp.MS',\
                 triangle = ['TS001','DE601HBA','DE605HBA']):
    a=inspect.stack()
    stacklevel=0
    for k in range(len(a)):
        if (string.find(a[k][1],'ipython console')>0):
            stacklevel=k
    myf=sys._getframe(stacklevel).f_globals
    myf['__last_task']='mytask'
    myf['taskname']='mytask'
    tb=myf['tb']
    oroot = infile.split('uv')[0]
    for lfile in np.sort(glob.glob(oroot+'*ms')):
        os.system('ms2uvfits in='+lfile+' out='+lfile.replace('ms','fits')+' writesyscal=F')
        if lfile == infile:
            continue
        tb.open(lfile+'/ANTENNA')
        names = tb.getcol('NAME')
        trnum = []
        for itr in range(3):
            trnum.append(np.argwhere(names==triangle[itr])[0][0])
        tb.close()
        trnum.sort()
        tb.open(lfile)
        ant1 = tb.getcol('ANTENNA1')
        ant2 = tb.getcol('ANTENNA2')
        data = tb.getcol('DATA')
        ph12 = +np.angle(data[0,0,(ant1==trnum[0])&(ant2==trnum[1])])
        ph23 = +np.angle(data[0,0,(ant1==trnum[1])&(ant2==trnum[2])])
        ph31 = -np.angle(data[0,0,(ant1==trnum[0])&(ant2==trnum[2])])
        clph = ph12+ph23+ph31
        np.putmask(clph,clph>np.pi,clph-2.*np.pi)
        np.putmask(clph,clph<-np.pi,clph+2.*np.pi)
#        np.savetxt(lfile.replace('ms','txt'),np.unwrap(clph))
        np.savetxt(lfile.replace('ms','txt'),clph)
コード例 #25
0
ファイル: frame.py プロジェクト: bshanks/pandas
    def _reindex_index(self, index, method, copy):
        if self.index.equals(index):
            if copy:
                return self.copy()
            else:
                return self

        if len(self.index) == 0:
            return SparseDataFrame(index=index, columns=self.columns)

        indexer = self.index.get_indexer(index, method)
        mask = indexer == -1
        need_mask = mask.any()

        new_series = {}
        for col, series in self.iteritems():
            values = series.values
            new = values.take(indexer)

            if need_mask:
                np.putmask(new, mask, nan)

            new_series[col] = new

        return SparseDataFrame(new_series, index=index, columns=self.columns,
                               default_fill_value=self.default_fill_value)
コード例 #26
0
ファイル: series.py プロジェクト: willgrass/pandas
    def map(self, arg):
        """
        Map values of Series using input correspondence (which can be
        a dict, Series, or function).

        Parameters
        ----------
        arg : function, dict, or Series

        Returns
        -------
        y : Series
            same index as caller
        """
        if isinstance(arg, (dict, Series)):
            if isinstance(arg, dict):
                arg = Series(arg)

            indexer, mask = tseries.getMergeVec(self, arg.index.indexMap)

            newValues = arg.view(np.ndarray).take(indexer)
            np.putmask(newValues, -mask, np.nan)

            newSer = Series(newValues, index=self.index)
            return newSer
        else:
            return Series([arg(x) for x in self], index=self.index)
コード例 #27
0
ファイル: perf.py プロジェクト: ychaim/tia
def returns(prices, method='simple', periods=1, fill_method='pad', limit=None, freq=None):
    """
     compute the returns for the specified prices.
     method: [simple,compound,log], compound is log
    """
    if method not in ('simple', 'compound', 'log'):
        raise ValueError("Invalid method type. Valid values are ('simple', 'compound')")

    if method == 'simple':
        return prices.pct_change(periods=periods, fill_method=fill_method, limit=limit, freq=freq)
    else:
        if freq is not None:
            raise NotImplementedError("TODO: implement this logic if needed")

        if isinstance(prices, pd.Series):
            if fill_method is None:
                data = prices
            else:
                data = prices.fillna(method=fill_method, limit=limit)

            data = np.log(data / data.shift(periods=periods))
            mask = pd.isnull(prices.values)
            np.putmask(data.values, mask, np.nan)
            return data
        else:
            return pd.DataFrame(
                {name: returns(col, method, periods, fill_method, limit, freq) for name, col in prices.iteritems()},
                columns=prices.columns,
                index=prices.index)
コード例 #28
0
ファイル: featsel.py プロジェクト: bpartridge/PyML
def golub(data, targetClass, otherClass, **args) :
    '''The Golub feature score:
    s = (mu1 - mu2) / sqrt(sigma1^2 + sigma2^2)
    '''

    if 'Y' in args :
        Y = args['Y']
        targetClassSize = numpy.sum(numpy.equal(Y, targetClass))
        otherClassSize = numpy.sum(numpy.equal(Y, otherClass))        
    else :
        Y = None
        targetClassSize = data.labels.classSize[targetClass] 
        otherClassSize = data.labels.classSize[otherClass]
    
    m1 = numpy.array(featureMean(data, targetClass, Y))
    m2 = numpy.array(featureMean(data, otherClass, Y))
    s1 = numpy.array(featureStd(data, targetClass, Y))
    s2 = numpy.array(featureStd(data, otherClass, Y))

    s = numpy.sqrt(s1**2 + s2**2)
    m = (m1 + m2) / 2.0

    # perfect features will have s[i] = 0, so need to take care of that:
    numpy.putmask(s, numpy.equal(s, 0), m)
    # features that are zero will still have s[i] = 0 so :
    numpy.putmask(s, numpy.equal(s, 0) ,1)
    
    g = (m1 - m2) / s
    
    return g
コード例 #29
0
ファイル: maputils.py プロジェクト: squireg/tcrm
def makeGridDomain(cLon, cLat, minLon, maxLon, minLat, maxLat, 
                   margin=2, resolution=0.01):
    """
    Generate a grid of the distance and angle of a grid of points
    surrounding a storm centre given the location of the storm.
    The grid margin and grid size can be set in configuration files.
    xMargin, yMargin and gridSize are in degrees


    """
    if (type(cLon)==list or type(cLat)==list or 
        type(cLon)==np.ndarray or type(cLat)==np.ndarray):
        raise TypeError, "Input values must be scalar values"
    gridSize = int(resolution * 1000)
    minLon_ = int(1000 * (minLon)) - int(1000 * margin)
    maxLon_ = int(1000 * (maxLon)) + int(1000 * margin) + 1
    minLat_ = int(1000 * (minLat)) - int(1000 * margin)
    maxLat_ = int(1000 * (maxLat)) + int(1000 * margin) + 1

    xGrid = np.array(np.arange(minLon_, maxLon_, gridSize), dtype=int)
    yGrid = np.array(np.arange(minLat_, maxLat_, gridSize), dtype=int)

    R = gridLatLonDist(cLon, cLat, xGrid / 1000., yGrid / 1000.)
    np.putmask(R, R==0, 1e-30)
    theta = np.pi / 2. - gridLatLonBear(cLon, cLat, 
                                        xGrid / 1000., yGrid / 1000.)
    return R, theta
コード例 #30
0
ファイル: utils.py プロジェクト: yoannMoreau/Evapo_GFS
def computeDailyMean(dicoBand,nbBandByDay,typeData):

    def meanCalc(values):
        return np.nanmean(values)

    mean={}
    footprint = np.array([[0,1,0],
                          [1,0,1],
                          [0,1,0]])
    
    for i in range(0,len(dicoBand.keys())/nbBandByDay):
        maxRange=nbBandByDay+i*nbBandByDay
        #on ne prend pas la dernière bande... correspondante à 00-->3h
        for j in range (i*nbBandByDay,maxRange):
            if "array" in locals():
                array=array+dicoBand.items()[j][1]
                np.putmask(dicoBand.items()[j][1], dicoBand.items()[j][1]==0, 0)
                mask=mask+(dicoBand.items()[j][1] > 0).astype(int)
            else:
                array=dicoBand.items()[j][1]
                np.putmask(dicoBand.items()[j][1], dicoBand.items()[j][1]==0, 0)
                mask=(dicoBand.items()[j][1] > 0).astype(int)

        mean[i]=array
        del array

        #utilisation de la fonction nanmean --> bcp plus simple

        mean[i]=mean[i]/mask
        indices = np.where(np.isnan(mean[i]))
        results = ndimage.generic_filter(mean[i], meanCalc, footprint=footprint)
        for row, col in zip(*indices):
            mean[i][row,col] = results[row,col]    
    
    return mean
コード例 #31
0
ファイル: scaling.py プロジェクト: josh200501/orange3
    def set_data(self, data, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = None
        self.scaled_data = None
        self.no_jittering_scaled_data = None
        self.valid_data_array = None

        self.raw_data = None
        self.have_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = data
        self.raw_data = data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([
            (full_data.domain[i].name, i) for i in range(len(full_data.domain))
        ])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = full_data.domain.has_continuous_class
        self.data_has_discrete_class = full_data.domain.has_discrete_class

        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[
                self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)

        self.domain_data_stat = getCached(full_data, DomainBasicStats,
                                          (full_data, ))

        sort_values_for_discrete_attrs = args.get(
            "sort_values_for_discrete_attrs", 1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if attr.is_discrete:
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif attr.is_continuous:
                self.attr_values[attr.name] = [
                    self.domain_data_stat[index].min,
                    self.domain_data_stat[index].max
                ]

        if 'no_data' in args:
            return

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData"):
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(
                data, "visualizationData")
        else:
            no_jittering_data = np.c_[full_data.X, full_data.Y].T
            valid_data_array = ~np.isnan(no_jittering_data)
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if attr.is_discrete:
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(
                        data.domain[index], sort_values_for_discrete_attrs)
                    if 0 in [
                            i == variable_value_indices[attr.values[i]]
                            for i in range(len(attr.values))
                    ]:
                        # make the array a contiguous, otherwise the putmask
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [
                            np.where(line == val, 1, 0)
                            for val in range(len(attr.values))
                        ]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                       variable_value_indices[attr.values[i]])
                        no_jittering_data[
                            index] = line  # save the changed array
                        original_data[
                            index] = line  # reorder also the values in the original data
                    no_jittering_data[index] = (
                        (no_jittering_data[index] * 2.0 + 1.0) /
                        float(2 * len(attr.values)))

                elif attr.is_continuous:
                    diff = self.domain_data_stat[
                        index].max - self.domain_data_stat[
                            index].min or 1  # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (
                        no_jittering_data[index] -
                        self.domain_data_stat[index].min) / diff

            self.original_data = original_data
            self.no_jittering_scaled_data = no_jittering_data
            self.valid_data_array = valid_data_array

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))

        # compute the scaled_data arrays
        scaled_data = self.no_jittering_scaled_data

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0,
                                            2**30 - 1,
                                            size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if attr.is_discrete:
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif attr.is_continuous and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (
                    0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(
                    scaled_data[index])  # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1,
                               0)  # fix values above 1
                np.putmask(scaled_data[index], ind,
                           2.0 - np.compress(ind, scaled_data[index]))

        self.scaled_data = scaled_data[:, :len_data]
コード例 #32
0
def mvstdnormcdf(lower, upper, corrcoef, **kwds):
    '''standardized multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.

    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This
             parameter can be used to limit the time. A sensible
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral


    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional than only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix

    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization

    Examples
    --------

    >>> print(mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5))
    0.5
    >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0], [0.0,0.0,0.0], corr, abseps=1e-6))
    0.166666399198
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0],corr, abseps=1e-8))
    something wrong completion with ERROR > EPS and MAXPTS function values used;
                        increase MAXPTS to decrease ERROR; 1.048330348e-006
    0.166666546218
    >>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0], corr, \
                            maxpts=100000, abseps=1e-8))
    0.166666588293

    '''
    n = len(lower)
    #don't know if converting to array is necessary,
    #but it makes ndim check possible
    lower = np.array(lower)
    upper = np.array(upper)
    corrcoef = np.array(corrcoef)

    correl = np.zeros(int(n*(n-1)/2.0))  #dtype necessary?

    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError('can handle only 1D bounds')
    if len(upper) != n:
        raise ValueError('bounds have different lengths')
    if n==2 and corrcoef.size==1:
        correl = corrcoef
        #print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n*(n-1)/2.0:
        #print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n,n):
        #print 'case square corr',  correl.shape
        correl = corrcoef[np.tril_indices(n, -1)]
#        for ii in range(n):
#            for jj in range(ii):
#                correl[ jj + ((ii-2)*(ii-1))/2] = corrcoef[ii,jj]
    else:
        raise ValueError('corrcoef has incorrect dimension')

    if 'maxpts' not in kwds:
        if n >2:
            kwds['maxpts'] = 10000*n

    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0*np.ones(n)

    np.putmask(infin,lowinf,0)# infin.putmask(0,lowinf)
    np.putmask(infin,uppinf,1) #infin.putmask(1,uppinf)
    #this has to be last
    np.putmask(infin,lowinf*uppinf,-1)

##    #remove infs
##    np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf)
##    np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf)

    #print lower,',',upper,',',infin,',',correl
    #print correl.shape
    #print kwds.items()
    error, cdfvalue, inform = scipy.stats.kde.mvn.mvndst(lower,upper,infin,correl,**kwds)
    if inform:
        print('something wrong', informcode[inform], error)
    return cdfvalue
コード例 #33
0
ファイル: boolean.py プロジェクト: warrenferns/pandas
    def all(self, *, skipna: bool = True, **kwargs):
        """
        Return whether all elements are True.

        Returns True unless there is at least one element that is False.
        By default, NAs are skipped. If ``skipna=False`` is specified and
        missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
        is used as for logical operations.

        Parameters
        ----------
        skipna : bool, default True
            Exclude NA values. If the entire array is NA and `skipna` is
            True, then the result will be True, as for an empty array.
            If `skipna` is False, the result will still be False if there is
            at least one element that is False, otherwise NA will be returned
            if there are NA's present.
        **kwargs : any, default None
            Additional keywords have no effect but might be accepted for
            compatibility with NumPy.

        Returns
        -------
        bool or :attr:`pandas.NA`

        See Also
        --------
        numpy.all : Numpy version of this method.
        BooleanArray.any : Return whether any element is True.

        Examples
        --------
        The result indicates whether any element is True (and by default
        skips NAs):

        >>> pd.array([True, True, pd.NA]).all()
        True
        >>> pd.array([True, False, pd.NA]).all()
        False
        >>> pd.array([], dtype="boolean").all()
        True
        >>> pd.array([pd.NA], dtype="boolean").all()
        True

        With ``skipna=False``, the result can be NA if this is logically
        required (whether ``pd.NA`` is True or False influences the result):

        >>> pd.array([True, True, pd.NA]).all(skipna=False)
        <NA>
        >>> pd.array([True, False, pd.NA]).all(skipna=False)
        False
        """
        kwargs.pop("axis", None)
        nv.validate_all((), kwargs)

        values = self._data.copy()
        np.putmask(values, self._mask, True)
        result = values.all()

        if skipna:
            return result
        else:
            if not result or len(self) == 0 or not self._mask.any():
                return result
            else:
                return self.dtype.na_value
コード例 #34
0
ファイル: orngLinProj.py プロジェクト: stefie10/slu_hri
    def optimize_SLOW_Separation(self,
                                 attrIndices,
                                 anchorData,
                                 XAnchors=None,
                                 YAnchors=None):
        if not self.graph.haveData or len(
                self.graph.rawData
        ) == 0 or not self.graph.dataHasDiscreteClass:
            return anchorData, (XAnchors, YAnchors)
        validData = self.graph.getValidList(attrIndices)
        selectedData = numpy.compress(validData,
                                      numpy.take(
                                          self.graph.noJitteringScaledData,
                                          attrIndices,
                                          axis=0),
                                      axis=1)

        if XAnchors == None:
            XAnchors = numpy.array([a[0] for a in anchorData], numpy.float)
        if YAnchors == None:
            YAnchors = numpy.array([a[1] for a in anchorData], numpy.float)

        transProjData = self.graph.createProjectionAsNumericArray(
            attrIndices,
            validData=validData,
            XAnchors=XAnchors,
            YAnchors=YAnchors,
            scaleFactor=self.graph.scaleFactor,
            normalize=self.graph.normalizeExamples,
            useAnchorData=1)
        if transProjData == None:
            return anchorData, (XAnchors, YAnchors)

        projData = numpy.transpose(transProjData)
        x_positions = projData[0]
        x_positions2 = numpy.array(x_positions)
        y_positions = projData[1]
        y_positions2 = numpy.array(y_positions)
        classData = projData[2]
        classData2 = numpy.array(classData)

        FXs = numpy.zeros(len(x_positions), numpy.float)  # forces
        FYs = numpy.zeros(len(x_positions), numpy.float)
        GXs = numpy.zeros(len(anchorData), numpy.float)  # gradients
        GYs = numpy.zeros(len(anchorData), numpy.float)

        rotateArray = range(len(x_positions))
        rotateArray = rotateArray[1:] + [0]
        for i in range(len(x_positions) - 1):
            x_positions2 = numpy.take(x_positions2, rotateArray)
            y_positions2 = numpy.take(y_positions2, rotateArray)
            classData2 = numpy.take(classData2, rotateArray)
            dx = x_positions2 - x_positions
            dy = y_positions2 - y_positions
            rs2 = dx**2 + dy**2
            rs2 += numpy.where(rs2 == 0.0, 0.0001,
                               0.0)  # replace zeros to avoid divisions by zero
            rs = numpy.sqrt(rs2)

            F = numpy.zeros(len(x_positions), numpy.float)
            classDiff = numpy.where(classData == classData2, 1, 0)
            numpy.putmask(F, classDiff, 150 * self.attractG * rs2)
            numpy.putmask(F, 1 - classDiff, -self.repelG / rs2)
            FXs += F * dx / rs
            FYs += F * dy / rs

        # compute gradient for all anchors
        GXs = numpy.array(
            [sum(FXs * selectedData[i]) for i in range(len(anchorData))],
            numpy.float)
        GYs = numpy.array(
            [sum(FYs * selectedData[i]) for i in range(len(anchorData))],
            numpy.float)

        m = max(max(abs(GXs)), max(abs(GYs)))
        GXs /= (20 * m)
        GYs /= (20 * m)

        newXAnchors = XAnchors + GXs
        newYAnchors = YAnchors + GYs

        # normalize so that the anchor most far away will lie on the circle
        m = math.sqrt(max(newXAnchors**2 + newYAnchors**2))
        newXAnchors /= m
        newYAnchors /= m
        return [(newXAnchors[i], newYAnchors[i], anchorData[i][2])
                for i in range(len(anchorData))], (newXAnchors, newYAnchors)
コード例 #35
0
ファイル: orngLinProj.py プロジェクト: stefie10/slu_hri
    def optimize_LDA_Separation(self,
                                attrIndices,
                                anchorData,
                                XAnchors=None,
                                YAnchors=None):
        if not self.graph.haveData or len(
                self.graph.rawData
        ) == 0 or not self.graph.dataHasDiscreteClass:
            return anchorData, (XAnchors, YAnchors)
        classCount = len(self.graph.dataDomain.classVar.values)
        validData = self.graph.getValidList(attrIndices)
        selectedData = numpy.compress(validData,
                                      numpy.take(
                                          self.graph.noJitteringScaledData,
                                          attrIndices,
                                          axis=0),
                                      axis=1)

        if XAnchors == None:
            XAnchors = numpy.array([a[0] for a in anchorData], numpy.float)
        if YAnchors == None:
            YAnchors = numpy.array([a[1] for a in anchorData], numpy.float)

        transProjData = self.graph.createProjectionAsNumericArray(
            attrIndices,
            validData=validData,
            XAnchors=XAnchors,
            YAnchors=YAnchors,
            scaleFactor=self.graph.scaleFactor,
            normalize=self.graph.normalizeExamples,
            useAnchorData=1)
        if transProjData == None:
            return anchorData, (XAnchors, YAnchors)

        projData = numpy.transpose(transProjData)
        x_positions, y_positions, classData = projData[0], projData[
            1], projData[2]

        averages = []
        for i in range(classCount):
            ind = classData == i
            xpos = numpy.compress(ind, x_positions)
            ypos = numpy.compress(ind, y_positions)
            xave = numpy.sum(xpos) / len(xpos)
            yave = numpy.sum(ypos) / len(ypos)
            averages.append((xave, yave))

        # compute the positions of all the points. we will try to move all points so that the center will be in the (0,0)
        xCenterVector = -numpy.sum(x_positions) / len(x_positions)
        yCenterVector = -numpy.sum(y_positions) / len(y_positions)
        centerVectorLength = math.sqrt(xCenterVector * xCenterVector +
                                       yCenterVector * yCenterVector)

        meanDestinationVectors = []

        for i in range(classCount):
            xDir = 0.0
            yDir = 0.0
            rs = 0.0
            for j in range(classCount):
                if i == j: continue
                r = math.sqrt((averages[i][0] - averages[j][0])**2 +
                              (averages[i][1] - averages[j][1])**2)
                if r == 0.0:
                    xDir += math.cos((i / float(classCount)) * 2 * math.pi)
                    yDir += math.sin((i / float(classCount)) * 2 * math.pi)
                    r = 0.0001
                else:
                    xDir += (1 / r**3) * ((averages[i][0] - averages[j][0]))
                    yDir += (1 / r**3) * ((averages[i][1] - averages[j][1]))
                #rs += 1/r
            #actualDirAmpl = math.sqrt(xDir**2 + yDir**2)
            #s = abs(xDir)+abs(yDir)
            #xDir = rs * (xDir/s)
            #yDir = rs * (yDir/s)
            meanDestinationVectors.append((xDir, yDir))

        maxLength = math.sqrt(
            max([x**2 + y**2 for (x, y) in meanDestinationVectors]))
        meanDestinationVectors = [
            (x / (2 * maxLength), y / (2 * maxLength))
            for (x, y) in meanDestinationVectors
        ]  # normalize destination vectors to some normal values
        meanDestinationVectors = [
            (meanDestinationVectors[i][0] + averages[i][0],
             meanDestinationVectors[i][1] + averages[i][1])
            for i in range(len(meanDestinationVectors))
        ]  # add destination vectors to the class averages
        #meanDestinationVectors = [(x + xCenterVector/5, y + yCenterVector/5) for (x,y) in meanDestinationVectors]   # center mean values
        meanDestinationVectors = [(x + xCenterVector, y + yCenterVector)
                                  for (x, y) in meanDestinationVectors
                                  ]  # center mean values

        FXs = numpy.zeros(len(x_positions), numpy.float)  # forces
        FYs = numpy.zeros(len(x_positions), numpy.float)

        for c in range(classCount):
            ind = (classData == c)
            numpy.putmask(FXs, ind, meanDestinationVectors[c][0] - x_positions)
            numpy.putmask(FYs, ind, meanDestinationVectors[c][1] - y_positions)

        # compute gradient for all anchors
        GXs = numpy.array(
            [sum(FXs * selectedData[i]) for i in range(len(anchorData))],
            numpy.float)
        GYs = numpy.array(
            [sum(FYs * selectedData[i]) for i in range(len(anchorData))],
            numpy.float)

        m = max(max(abs(GXs)), max(abs(GYs)))
        GXs /= (20 * m)
        GYs /= (20 * m)

        newXAnchors = XAnchors + GXs
        newYAnchors = YAnchors + GYs

        # normalize so that the anchor most far away will lie on the circle
        m = math.sqrt(max(newXAnchors**2 + newYAnchors**2))
        newXAnchors /= m
        newYAnchors /= m

        #self.parentWidget.updateGraph()
        """
        for a in range(len(anchorData)):
            x = anchorData[a][0]; y = anchorData[a][1];
            self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [x, x+GXs[a]], yData = [y, y+GYs[a]], forceFilledSymbols = 1, lineWidth=3)

        for i in range(classCount):
            self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [averages[i][0], meanDestinationVectors[i][0]], yData = [averages[i][1], meanDestinationVectors[i][1]], forceFilledSymbols = 1, lineWidth=3)
            self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, xData = [averages[i][0], averages[i][0]], yData = [averages[i][1], averages[i][1]], forceFilledSymbols = 1, lineWidth=5)
        """
        #self.parentWidget.graph.repaint()
        #self.graph.anchorData = [(newXAnchors[i], newYAnchors[i], anchorData[i][2]) for i in range(len(anchorData))]
        #self.graph.updateData(attrs, 0)
        return [(newXAnchors[i], newYAnchors[i], anchorData[i][2])
                for i in range(len(anchorData))], (newXAnchors, newYAnchors)
コード例 #36
0
def lastrank(a, axis=-1):
    """
    The ranking of the last element along the axis, ignoring NaNs.

    The ranking is normalized to be between -1 and 1 instead of the more
    common 1 and N. The results are adjusted for ties.

    Parameters
    ----------
    a : ndarray
        Input array. If `a` is not an array, a conversion is attempted.
    axis : int, optional
        The axis over which to rank. By default (axis=-1) the ranking
        (and reducing) is performed over the last axis.

    Returns
    -------
    d : array
        In the case of, for example, a 2d array of shape (n, m) and
        axis=1, the output will contain the rank (normalized to be between
        -1 and 1 and adjusted for ties) of the the last element of each row.
        The output in this example will have shape (n,).

    Examples
    --------
    Create an array:

    >>> y1 = larry([1, 2, 3])

    What is the rank of the last element (the value 3 in this example)?
    It is the largest element so the rank is 1.0:

    >>> import numpy as np
    >>> from la.afunc import lastrank
    >>> x1 = np.array([1, 2, 3])
    >>> lastrank(x1)
    1.0

    Now let's try an example where the last element has the smallest
    value:

    >>> x2 = np.array([3, 2, 1])
    >>> lastrank(x2)
    -1.0

    Here's an example where the last element is not the minimum or maximum
    value:

    >>> x3 = np.array([1, 3, 4, 5, 2])
    >>> lastrank(x3)
    -0.5

    """
    a = np.array(a, copy=False)
    ndim = a.ndim
    if a.size == 0:
        # At least one dimension has length 0
        shape = list(a.shape)
        shape.pop(axis)
        r = np.empty(shape, dtype=a.dtype)
        r.fill(np.nan)
        if (r.ndim == 0) and (r.size == 1):
            r = np.nan
        return r
    indlast = [slice(None)] * ndim
    indlast[axis] = slice(-1, None)
    indlast = tuple(indlast)
    indlast2 = [slice(None)] * ndim
    indlast2[axis] = -1
    indlast2 = tuple(indlast2)
    n = (~np.isnan(a)).sum(axis)
    a_indlast = a[indlast]
    g = (a_indlast > a).sum(axis)
    e = (a_indlast == a).sum(axis)
    r = (g + g + e - 1.0) / 2.0
    r = r / (n - 1.0)
    r = 2.0 * (r - 0.5)
    if ndim == 1:
        if n == 1:
            r = 0
        if np.isnan(a[indlast2]):  # elif?
            r = np.nan
    else:
        np.putmask(r, n == 1, 0)
        np.putmask(r, np.isnan(a[indlast2]), np.nan)
    return r
コード例 #37
0
ファイル: mdv_common.py プロジェクト: chrisgump/pyart
    def read_a_field(self, fnum, debug=False):
        """
        Read a field from the MDV file.

        Parameters
        ----------
        fnum : int
            Field number to read.
        debug : bool
            True to print debugging information, False to supress.

        Returns
        -------
        field_data : array
            Field data.  This data is also stored as a object attribute under
            the field name.

        See Also
        --------
        read_all_fields : Read all fields in the MDV file.

        """

        field_header = self.field_headers[fnum]
        # if the field has already been read, return it
        if self.fields_data[fnum] is not None:
            if debug:
                print("Getting data from the object.")
            return self.fields_data[fnum]

        # field has not yet been read, populate the object and return
        if debug:
            print("No data found in object, populating")

        nz = field_header['nz']
        ny = field_header['ny']
        nx = field_header['nx']

        # read the header
        field_data = np.zeros([nz, ny, nx], dtype='float32')
        self.fileptr.seek(field_header['field_data_offset'])
        self._get_levels_info(nz)  # dict not used, but need to seek.

        for sw in range(nz):
            if debug:
                print("doing levels ", sw)

            # get the compressed level data
            compr_info = self._get_compression_info()
            if compr_info['magic_cookie'] == 0xfe0103fd:
                # Run length encoding only has 20 bytes of compression
                # information with slightly different order, back up
                # 4 bytes to read all of the compressed data.
                self.fileptr.seek(-4, 1)
                compr_data = self.fileptr.read(compr_info['spare'][0])
            else:
                compr_data = self.fileptr.read(compr_info['nbytes_coded'])
            encoding_type = field_header['encoding_type']
            if encoding_type == ENCODING_INT8:
                fmt = '>%iB' % (nx * ny)
                np_form = '>B'
            elif encoding_type == ENCODING_INT16:
                fmt = '>%iH' % (nx * ny)
                np_form = '>H'
            elif encoding_type == ENCODING_FLOAT32:
                fmt = '>%if' % (nx * ny)
                np_form = '>f'
            else:
                raise NotImplementedError('encoding: ', encoding_type)

            # decompress the level data
            if compr_info['magic_cookie'] == 0xf7f7f7f7:
                cd_fobj = BytesIO(compr_data)
                gzip_file_handle = gzip.GzipFile(fileobj=cd_fobj)
                decompr_data = gzip_file_handle.read(struct.calcsize(fmt))
                gzip_file_handle.close()
            elif compr_info['magic_cookie'] == 0xf5f5f5f5:
                decompr_data = zlib.decompress(compr_data)
            elif compr_info['magic_cookie'] == 0xf6f6f6f6:
                # ZLIB_NOT_COMPRESSED
                decompr_data = compr_data
            elif compr_info['magic_cookie'] == 0xfe0103fd:
                # Run length encoding of 8-bit data
                # Compression info is in a different order, namely
                # int32 : RL8_FLAG (0xfe0103fd)
                # int32 : key
                # int32 : nbytes_array (bytes of encoded data with header)
                # int32 : nbytes_full (bytes of unencoded data, no header)
                # int32 : nbytes_coded (bytes of encoded data, no header)
                key = compr_info['nbytes_uncompressed']
                decompr_size = compr_info['nbytes_coded']
                decompr_data = _decode_rle8(compr_data, key, decompr_size)
            else:
                raise NotImplementedError('unsupported compression mode')
                # With sample data it should be possible to write
                # decompressor for other modes, the compression magic
                # cookies for these modes are:
                # 0x2f2f2f2f : TA_NOT_COMPRESSED
                # 0xf8f8f8f8 : GZIP_NOT_COMPRSSED
                # 0xf3f3f3f3 : BZIP_COMPRESSED
                # 0xf4f4f4f4 : BZIP_NOT_COMPRESSED

            # read the decompressed data, reshape and mask
            sw_data = np.fromstring(decompr_data, np_form).astype('float32')
            sw_data.shape = (ny, nx)
            mask = sw_data == field_header['bad_data_value']
            np.putmask(sw_data, mask, [np.NaN])

            # scale and offset the data, store in field_data
            scale = field_header['scale']
            bias = field_header['bias']
            field_data[sw, :, :] = sw_data * scale + bias

        # store data as object attribute and return
        self.fields_data[fnum] = field_data
        return field_data
コード例 #38
0
 def replace_nans(self, orig, filtered_values):
     new = orig.copy()
     np.putmask(new, np.isnan(new), filtered_values)
     return new.data
コード例 #39
0
def nanskew(values, axis=None, skipna=True, mask=None):
    """ Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G1. The algorithm computes this coefficient directly
    from the second and third central moment.

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float64
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1,np.nan, 1, 2])
    >>> nanops.nanskew(s)
    1.7320508075688787
    """
    values = lib.values_from_object(values)
    mask = _maybe_get_mask(values, skipna, mask)
    if not is_float_dtype(values.dtype):
        values = values.astype("f8")
        count = _get_counts(values.shape, mask, axis)
    else:
        count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

    if skipna and mask is not None:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna and mask is not None:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted**2
    adjusted3 = adjusted2 * adjusted
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m3 = adjusted3.sum(axis, dtype=np.float64)

    # floating point error
    #
    # #18044 in _libs/windows.pyx calc_skew follow this behavior
    # to fix the fperr to treat m2 <1e-14 as zero
    m2 = _zero_out_fperr(m2)
    m3 = _zero_out_fperr(m3)

    with np.errstate(invalid="ignore", divide="ignore"):
        result = (count * (count - 1)**0.5 / (count - 2)) * (m3 / m2**1.5)

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(m2 == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if m2 == 0 else result
        if count < 3:
            return np.nan
        return result
コード例 #40
0
ファイル: numpy.py プロジェクト: oleks/distributed-hypothesis
    def do_draw(self, data):
        if 0 in self.shape:
            return np.zeros(dtype=self.dtype, shape=self.shape)

        # Reset this flag for each test case to emit warnings from set_element
        self._report_overflow = True

        # This could legitimately be a np.empty, but the performance gains for
        # that would be so marginal that there's really not much point risking
        # undefined behaviour shenanigans.
        result = np.zeros(shape=self.array_size, dtype=self.dtype)

        if self.fill.is_empty:
            # We have no fill value (either because the user explicitly
            # disabled it or because the default behaviour was used and our
            # elements strategy does not produce reusable values), so we must
            # generate a fully dense array with a freshly drawn value for each
            # entry.
            if self.unique:
                seen = set()
                elements = cu.many(data,
                                   min_size=self.array_size,
                                   max_size=self.array_size,
                                   average_size=self.array_size)
                i = 0
                while elements.more():
                    # We assign first because this means we check for
                    # uniqueness after numpy has converted it to the relevant
                    # type for us. Because we don't increment the counter on
                    # a duplicate we will overwrite it on the next draw.
                    self.set_element(data, result, i)
                    if result[i] not in seen:
                        seen.add(result[i])
                        i += 1
                    else:
                        elements.reject()
            else:
                for i in hrange(len(result)):
                    self.set_element(data, result, i)
        else:
            # We draw numpy arrays as "sparse with an offset". We draw a
            # collection of index assignments within the array and assign
            # fresh values from our elements strategy to those indices. If at
            # the end we have not assigned every element then we draw a single
            # value from our fill strategy and use that to populate the
            # remaining positions with that strategy.

            elements = cu.many(
                data,
                min_size=0,
                max_size=self.array_size,
                # sqrt isn't chosen for any particularly principled reason. It
                # just grows reasonably quickly but sublinearly, and for small
                # arrays it represents a decent fraction of the array size.
                average_size=math.sqrt(self.array_size),
            )

            needs_fill = np.full(self.array_size, True)
            seen = set()

            while elements.more():
                i = cu.integer_range(data, 0, self.array_size - 1)
                if not needs_fill[i]:
                    elements.reject()
                    continue
                self.set_element(data, result, i)
                if self.unique:
                    if result[i] in seen:
                        elements.reject()
                        continue
                    else:
                        seen.add(result[i])
                needs_fill[i] = False
            if needs_fill.any():
                # We didn't fill all of the indices in the early loop, so we
                # put a fill value into the rest.

                # We have to do this hilarious little song and dance to work
                # around numpy's special handling of iterable values. If the
                # value here were e.g. a tuple then neither array creation
                # nor putmask would do the right thing. But by creating an
                # array of size one and then assigning the fill value as a
                # single element, we both get an array with the right value in
                # it and putmask will do the right thing by repeating the
                # values of the array across the mask.
                one_element = np.zeros(shape=1, dtype=self.dtype)
                self.set_element(data, one_element, 0, self.fill)
                fill_value = one_element[0]
                if self.unique:
                    try:
                        is_nan = np.isnan(fill_value)
                    except TypeError:
                        is_nan = False

                    if not is_nan:
                        raise InvalidArgument(
                            'Cannot fill unique array with non-NaN '
                            'value %r' % (fill_value, ))

                np.putmask(result, needs_fill, one_element)

        return result.reshape(self.shape)
コード例 #41
0
def _get_values(
    values: np.ndarray,
    skipna: bool,
    fill_value: Any = None,
    fill_value_typ: Optional[str] = None,
    mask: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]:
    """
    Utility to get the values view, mask, dtype, dtype_max, and fill_value.

    If both mask and fill_value/fill_value_typ are not None and skipna is True,
    the values array will be copied.

    For input arrays of boolean or integer dtypes, copies will only occur if a
    precomputed mask, a fill_value/fill_value_typ, and skipna=True are
    provided.

    Parameters
    ----------
    values : ndarray
        input array to potentially compute mask for
    skipna : bool
        boolean for whether NaNs should be skipped
    fill_value : Any
        value to fill NaNs with
    fill_value_typ : str
        Set to '+inf' or '-inf' to handle dtype-specific infinities
    mask : Optional[np.ndarray]
        nan-mask if known

    Returns
    -------
    values : ndarray
        Potential copy of input value array
    mask : Optional[ndarray[bool]]
        Mask for values, if deemed necessary to compute
    dtype : dtype
        dtype for values
    dtype_max : dtype
        platform independent dtype
    fill_value : Any
        fill value used
    """

    # In _get_values is only called from within nanops, and in all cases
    #  with scalar fill_value.  This guarantee is important for the
    #  maybe_upcast_putmask call below
    assert is_scalar(fill_value)

    mask = _maybe_get_mask(values, skipna, mask)

    if is_datetime64tz_dtype(values):
        # lib.values_from_object returns M8[ns] dtype instead of tz-aware,
        #  so this case must be handled separately from the rest
        dtype = values.dtype
        values = getattr(values, "_values", values)
    else:
        values = lib.values_from_object(values)
        dtype = values.dtype

    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = getattr(values, "asi8", values)
        values = values.view(np.int64)

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    copy = (mask is not None) and (fill_value is not None)

    if skipna and copy:
        values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, _ = maybe_upcast_putmask(values, mask, fill_value)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value
コード例 #42
0
def _bins_to_cuts(
    x,
    bins,
    right: bool = True,
    labels=None,
    precision: int = 3,
    include_lowest: bool = False,
    dtype=None,
    duplicates: str = "raise",
    ordered: bool = True,
):
    if not ordered and labels is None:
        raise ValueError("'labels' must be provided if 'ordered = False'")

    if duplicates not in ["raise", "drop"]:
        raise ValueError(
            "invalid value for 'duplicates' parameter, valid options are: raise, drop"
        )

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = Categorical.from_codes(ids, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == "raise":
            raise ValueError(
                f"Bin edges must be unique: {repr(bins)}.\n"
                f"You can drop duplicate edges by setting the 'duplicates' kwarg"
            )
        else:
            bins = unique_bins

    side = "left" if right else "right"
    ids = ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if not (labels is None or is_list_like(labels)):
            raise ValueError(
                "Bin labels must either be False, None or passed in as a "
                "list-like argument")

        elif labels is None:
            labels = _format_labels(bins,
                                    precision,
                                    right=right,
                                    include_lowest=include_lowest,
                                    dtype=dtype)
        elif ordered and len(set(labels)) != len(labels):
            raise ValueError(
                "labels must be unique if ordered=True; pass ordered=False for duplicate labels"  # noqa
            )
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError(
                    "Bin labels must be one fewer than the number of bin edges"
                )
        if not is_categorical_dtype(labels):
            labels = Categorical(
                labels,
                categories=labels if len(set(labels)) == len(labels) else None,
                ordered=ordered,
            )
        # TODO: handle mismatch between categorical label order and pandas.cut order.
        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
コード例 #43
0
def _highly_variable_genes_seurat_v3(
    adata: AnnData,
    layer: Optional[str] = None,
    n_top_genes: int = 2000,
    batch_key: Optional[str] = None,
    check_values: bool = True,
    span: float = 0.3,
    subset: bool = False,
    inplace: bool = True,
) -> Optional[pd.DataFrame]:
    """\
    See `highly_variable_genes`.

    For further implemenation details see https://www.overleaf.com/read/ckptrbgzzzpg

    Returns
    -------
    Depending on `inplace` returns calculated metrics (:class:`~pd.DataFrame`) or
    updates `.var` with the following fields

    highly_variable : bool
        boolean indicator of highly-variable genes
    **means**
        means per gene
    **variances**
        variance per gene
    **variances_norm**
        normalized variance per gene, averaged in the case of multiple batches
    highly_variable_rank : float
        Rank of the gene according to normalized variance, median rank in the case of multiple batches
    highly_variable_nbatches : int
        If batch_key is given, this denotes in how many batches genes are detected as HVG
    """

    try:
        from skmisc.loess import loess
    except ImportError:
        raise ImportError(
            'Please install skmisc package via `pip install --user scikit-misc'
        )

    X = adata.layers[layer] if layer is not None else adata.X
    if check_values and not check_nonnegative_integers(X):
        warnings.warn(
            "`flavor='seurat_v3'` expects raw count data, but non-integers were found.",
            UserWarning,
        )

    if batch_key is None:
        batch_info = pd.Categorical(np.zeros(adata.shape[0], dtype=int))
    else:
        batch_info = adata.obs[batch_key].values

    norm_gene_vars = []
    for b in np.unique(batch_info):

        ad = adata[batch_info == b]
        X = ad.layers[layer] if layer is not None else ad.X

        mean, var = _get_mean_var(X)
        not_const = var > 0
        estimat_var = np.zeros(adata.shape[1], dtype=np.float64)

        y = np.log10(var[not_const])
        x = np.log10(mean[not_const])
        model = loess(x, y, span=span, degree=2)
        model.fit()
        estimat_var[not_const] = model.outputs.fitted_values
        reg_std = np.sqrt(10**estimat_var)

        batch_counts = X.astype(np.float64).copy()
        # clip large values as in Seurat
        N = np.sum(batch_info == b)
        vmax = np.sqrt(N)
        clip_val = reg_std * vmax + mean
        if sp_sparse.issparse(batch_counts):
            batch_counts = sp_sparse.csr_matrix(batch_counts)
            mask = batch_counts.data > clip_val[batch_counts.indices]
            batch_counts.data[mask] = clip_val[batch_counts.indices[mask]]
        else:
            clip_val_broad = np.broadcast_to(clip_val, batch_counts.shape)
            np.putmask(
                batch_counts,
                batch_counts > clip_val_broad,
                clip_val_broad,
            )

        if sp_sparse.issparse(batch_counts):
            squared_batch_counts_sum = np.array(
                batch_counts.power(2).sum(axis=0))
            batch_counts_sum = np.array(batch_counts.sum(axis=0))
        else:
            squared_batch_counts_sum = np.square(batch_counts).sum(axis=0)
            batch_counts_sum = batch_counts.sum(axis=0)

        norm_gene_var = (1 / ((N - 1) * np.square(reg_std))) * (
            (N * np.square(mean)) + squared_batch_counts_sum -
            2 * batch_counts_sum * mean)
        norm_gene_vars.append(norm_gene_var.reshape(1, -1))

    norm_gene_vars = np.concatenate(norm_gene_vars, axis=0)
    # argsort twice gives ranks, small rank means most variable
    ranked_norm_gene_vars = np.argsort(np.argsort(-norm_gene_vars, axis=1),
                                       axis=1)

    # this is done in SelectIntegrationFeatures() in Seurat v3
    ranked_norm_gene_vars = ranked_norm_gene_vars.astype(np.float32)
    num_batches_high_var = np.sum(
        (ranked_norm_gene_vars < n_top_genes).astype(int), axis=0)
    ranked_norm_gene_vars[ranked_norm_gene_vars >= n_top_genes] = np.nan
    ma_ranked = np.ma.masked_invalid(ranked_norm_gene_vars)
    median_ranked = np.ma.median(ma_ranked, axis=0).filled(np.nan)

    df = pd.DataFrame(index=np.array(adata.var_names))
    df['highly_variable_nbatches'] = num_batches_high_var
    df['highly_variable_rank'] = median_ranked
    df['variances_norm'] = np.mean(norm_gene_vars, axis=0)
    df['means'] = mean
    df['variances'] = var

    df.sort_values(
        ['highly_variable_rank', 'highly_variable_nbatches'],
        ascending=[True, False],
        na_position='last',
        inplace=True,
    )
    df['highly_variable'] = False
    df.loc[:int(n_top_genes), 'highly_variable'] = True
    df = df.loc[adata.var_names]

    if inplace or subset:
        adata.uns['hvg'] = {'flavor': 'seurat_v3'}
        logg.hint('added\n'
                  '    \'highly_variable\', boolean vector (adata.var)\n'
                  '    \'highly_variable_rank\', float vector (adata.var)\n'
                  '    \'means\', float vector (adata.var)\n'
                  '    \'variances\', float vector (adata.var)\n'
                  '    \'variances_norm\', float vector (adata.var)')
        adata.var['highly_variable'] = df['highly_variable'].values
        adata.var['highly_variable_rank'] = df['highly_variable_rank'].values
        adata.var['means'] = df['means'].values
        adata.var['variances'] = df['variances'].values
        adata.var['variances_norm'] = df['variances_norm'].values.astype(
            'float64', copy=False)
        if batch_key is not None:
            adata.var['highly_variable_nbatches'] = df[
                'highly_variable_nbatches'].values
        if subset:
            adata._inplace_subset_var(df['highly_variable'].values)
    else:
        if batch_key is None:
            df = df.drop(['highly_variable_nbatches'], axis=1)
        return df
コード例 #44
0
def nanvar(values, axis=None, skipna=True, ddof=1, mask=None):
    """
    Compute the variance along given axis while ignoring NaNs

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    ddof : int, default 1
        Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
        where N represents the number of elements.
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1, np.nan, 2, 3])
    >>> nanops.nanvar(s)
    1.0
    """
    values = lib.values_from_object(values)
    dtype = values.dtype
    mask = _maybe_get_mask(values, skipna, mask)
    if is_any_int_dtype(values):
        values = values.astype("f8")
        if mask is not None:
            values[mask] = np.nan

    if is_float_dtype(values):
        count, d = _get_counts_nanvar(values.shape, mask, axis, ddof,
                                      values.dtype)
    else:
        count, d = _get_counts_nanvar(values.shape, mask, axis, ddof)

    if skipna and mask is not None:
        values = values.copy()
        np.putmask(values, mask, 0)

    # xref GH10242
    # Compute variance via two-pass algorithm, which is stable against
    # cancellation errors and relatively accurate for small numbers of
    # observations.
    #
    # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
    if axis is not None:
        avg = np.expand_dims(avg, axis)
    sqr = _ensure_numeric((avg - values)**2)
    if mask is not None:
        np.putmask(sqr, mask, 0)
    result = sqr.sum(axis=axis, dtype=np.float64) / d

    # Return variance as np.float64 (the datatype used in the accumulator),
    # unless we were dealing with a float array, in which case use the same
    # precision as the original values array.
    if is_float_dtype(dtype):
        result = result.astype(dtype)
    return _wrap_results(result, values.dtype)
コード例 #45
0
ファイル: lcdnorm4.py プロジェクト: kashif181/live_detect
def lcdnorm4(arr_in,
             neighborhood,
             contrast=DEFAULT_CONTRAST,
             divisive=DEFAULT_DIVISIVE,
             stretch=DEFAULT_STRETCH,
             threshold=DEFAULT_THRESHOLD,
             stride=DEFAULT_STRIDE,
             arr_out=None):
    """4D Local Contrast Divisive Normalization

    XXX: docstring
    """

    assert arr_in.ndim == 4
    assert len(neighborhood) == 2
    assert isinstance(contrast, bool)
    assert isinstance(divisive, bool)
    assert contrast or divisive

    in_imgs, inh, inw, ind = arr_in.shape

    nbh, nbw = neighborhood
    assert nbh <= inh
    assert nbw <= inw

    nb_size = 1. * nbh * nbw * ind

    if arr_out is not None:
        assert arr_out.dtype == arr_in.dtype
        assert arr_out.shape == (in_imgs, 1 + (inh - nbh) / stride,
                                 1 + (inw - nbw) / stride, ind)

    # -- prepare arr_out
    lys = nbh / 2
    lxs = nbw / 2
    rys = (nbh - 1) / 2
    rxs = (nbw - 1) / 2
    _arr_out = arr_in[:, lys:inh - rys, lxs:inw - rxs][::stride, ::stride]

    # -- Contrast Normalization
    if contrast:

        # -- local sums
        arr_sum = arr_in.sum(-1)
        arr_sum = view_as_windows(arr_sum, (1, 1, nbw)).sum(-1)[:, :, ::stride,
                                                                0, 0]
        arr_sum = view_as_windows(arr_sum, (1, nbh, 1)).sum(-2)[:, ::stride, :,
                                                                0]

        # -- remove the mean
        _arr_out = _arr_out - arr_sum / nb_size

    # -- Divisive (gain) Normalization
    if divisive:

        # -- local sums of squares
        arr_ssq = (arr_in**2.0).sum(-1)
        arr_ssq = view_as_windows(arr_ssq, (1, 1, nbw)).sum(-1)[:, :, ::stride,
                                                                0, 0]
        arr_ssq = view_as_windows(arr_ssq, (1, nbh, 1)).sum(-2)[:, ::stride, :,
                                                                0]

        # -- divide by the euclidean norm
        if contrast:
            l2norms = (arr_ssq - (arr_sum**2.0) / nb_size)
        else:
            l2norms = arr_ssq

        np.putmask(l2norms, l2norms < 0., 0.)
        l2norms = np.sqrt(l2norms) + EPSILON

        if stretch != 1:
            _arr_out *= stretch
            l2norms *= stretch

        np.putmask(l2norms, l2norms < (threshold + EPSILON), 1.0)

        _arr_out = _arr_out / l2norms

    if arr_out is not None:
        arr_out[:] = _arr_out
    else:
        arr_out = _arr_out

    assert arr_out.shape[0] == in_imgs

    return arr_out
コード例 #46
0
def nankurt(values, axis=None, skipna=True, mask=None):
    """
    Compute the sample excess kurtosis

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G2, computed directly from the second and fourth
    central moment.

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float64
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1,np.nan, 1, 3, 2])
    >>> nanops.nankurt(s)
    -1.2892561983471076
    """
    values = lib.values_from_object(values)
    mask = _maybe_get_mask(values, skipna, mask)
    if not is_float_dtype(values.dtype):
        values = values.astype("f8")
        count = _get_counts(values.shape, mask, axis)
    else:
        count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

    if skipna and mask is not None:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna and mask is not None:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted**2
    adjusted4 = adjusted2**2
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m4 = adjusted4.sum(axis, dtype=np.float64)

    with np.errstate(invalid="ignore", divide="ignore"):
        adj = 3 * (count - 1)**2 / ((count - 2) * (count - 3))
        numer = count * (count + 1) * (count - 1) * m4
        denom = (count - 2) * (count - 3) * m2**2

    # floating point error
    #
    # #18044 in _libs/windows.pyx calc_kurt follow this behavior
    # to fix the fperr to treat denom <1e-14 as zero
    numer = _zero_out_fperr(numer)
    denom = _zero_out_fperr(denom)

    if not isinstance(denom, np.ndarray):
        # if ``denom`` is a scalar, check these corner cases first before
        # doing division
        if count < 4:
            return np.nan
        if denom == 0:
            return 0

    with np.errstate(invalid="ignore", divide="ignore"):
        result = numer / denom - adj

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(denom == 0, 0, result)
        result[count < 4] = np.nan

    return result
コード例 #47
0
ファイル: test_filter.py プロジェクト: wenzheli/zipline
    def test_percentile_between(self):

        quintiles = range(5)
        filter_names = ['pct_' + str(q) for q in quintiles]
        iter_quintiles = list(zip(filter_names, quintiles))
        terms = {
            name: self.f.percentile_between(q * 20.0, (q + 1) * 20.0)
            for name, q in iter_quintiles
        }

        # Test with 5 columns and no NaNs.
        eye5 = eye(5, dtype=float64)
        expected = {}
        for name, quintile in iter_quintiles:
            if quintile < 4:
                # There are four 0s and one 1 in each row, so the first 4
                # quintiles should be all the locations with zeros in the input
                # array.
                expected[name] = ~eye5.astype(bool)
            else:
                # The top quintile should match the sole 1 in each row.
                expected[name] = eye5.astype(bool)

        self.check_terms(
            terms=terms,
            expected=expected,
            initial_workspace={self.f: eye5},
            mask=self.build_mask(ones((5, 5))),
        )

        # Test with 6 columns, no NaNs, and one masked entry per day.
        eye6 = eye(6, dtype=float64)
        mask = array(
            [[1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1],
             [1, 1, 0, 1, 1, 1], [1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 0, 1]],
            dtype=bool)
        expected = {}
        for name, quintile in iter_quintiles:
            if quintile < 4:
                # Should keep all values that were 0 in the base data and were
                # 1 in the mask.
                expected[name] = mask & ~eye6.astype(bool)
            else:
                # The top quintile should match the sole 1 in each row.
                expected[name] = eye6.astype(bool)

        self.check_terms(
            terms=terms,
            expected=expected,
            initial_workspace={self.f: eye6},
            mask=self.build_mask(mask),
        )

        # Test with 6 columns, no mask, and one NaN per day.  Should have the
        # same outcome as if we had masked the NaNs.
        # In particular, the NaNs should never pass any filters.
        eye6_withnans = eye6.copy()
        putmask(eye6_withnans, ~mask, nan)
        expected = {}
        for name, quintile in iter_quintiles:
            if quintile < 4:
                # Should keep all values that were 0 in the base data and were
                # 1 in the mask.
                expected[name] = mask & (~eye6.astype(bool))
            else:
                # Should keep all the 1s in the base data.
                expected[name] = eye6.astype(bool)

        self.check_terms(
            terms,
            expected,
            initial_workspace={self.f: eye6},
            mask=self.build_mask(mask),
        )
コード例 #48
0
ファイル: plot.py プロジェクト: andrei-v-frolov/caustics
        q = -np.log(1.0 / np.square(J_pic) + 1.0e-16) / 2.0
        break
    if case("lnF(a)"):
        q = -np.log(1.0 / np.square(J_a) + 1.0e-16) / 2.0
        break
    if case("lnF(p)"):
        q = -np.log(1.0 / np.square(J_p) + 1.0e-16) / 2.0
        break
    if case("entropy"):
        q = np.cumsum(S, axis=0)
        break
    if case("lyapunov"):
        q = S
        break

np.putmask(q, np.isnan(q), 0.0)
np.putmask(q, np.isinf(q), 0.0)

ny, nx = q.shape

# antialias using median filter
osy = ny / 500
osx = nx / 1000

if (osy != 1 or osx != 1):
    aakernel = [(osy & (~1)) + 1, (osx & (~1)) + 1]
    q = medfilt(q, aakernel)[::osy, ::osx]
    ny, nx = q.shape
    print("Antialiased with median kernel %s; output size is (%i,%i) pixels" %
          (aakernel, ny, nx))
コード例 #49
0
def v1like_norm(hin, conv_mode, kshape, threshold):
    """ V1LIKE local normalization

    Each pixel in the input image is divisively normalized by the L2 norm
    of the pixels in a local neighborhood around it, and the result of this
    division is placed in the output image.

    Inputs:
      hin -- a 3-dimensional array (width X height X rgb)
      kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization
                neighborhood
      threshold -- magnitude threshold, if the vector's length is below
                   it doesn't get resized ex: 1.

    Outputs:
      hout -- a normalized 3-dimensional array (width X height X rgb)

    """
    eps = 1e-5
    kh, kw = kshape
    dtype = hin.dtype
    hsrc = hin[:].copy()

    # -- prepare hout
    hin_h, hin_w, hin_d = hin.shape
    hout_h = hin_h  # - kh + 1
    hout_w = hin_w  # - kw + 1

    if conv_mode != "same":
        hout_h = hout_h - kh + 1
        hout_w = hout_w - kw + 1

    hout_d = hin_d
    hout = np.empty((hout_h, hout_w, hout_d), 'float32')

    # -- compute numerator (hnum) and divisor (hdiv)
    # sum kernel
    hin_d = hin.shape[-1]
    kshape3d = list(kshape) + [hin_d]
    ker = np.ones(kshape3d, dtype=dtype)
    size = ker.size

    # compute sum-of-square
    hsq = hsrc**2.
    #hssq = conv(hsq, ker, conv_mode).astype(dtype)
    kerH = ker[:, 0, 0][:, None]  #, None]
    kerW = ker[0, :, 0][None, :]  #, None]
    kerD = ker[0, 0, :][None, None, :]

    hssq = conv(
        conv(conv(hsq, kerD, 'valid')[:, :, 0].astype(dtype), kerW, conv_mode),
        kerH, conv_mode).astype(dtype)
    hssq = hssq[:, :, None]

    # compute hnum and hdiv
    ys = kh / 2
    xs = kw / 2
    hout_h, hout_w, hout_d = hout.shape[-3:]
    hs = hout_h
    ws = hout_w

    hsum = conv(
        conv(
            conv(hsrc, kerD, 'valid')[:, :, 0].astype(dtype), kerW, conv_mode),
        kerH, conv_mode).astype(dtype)

    hsum = hsum[:, :, None]
    if conv_mode == 'same':
        hnum = hsrc - (hsum / size)
    else:
        hnum = hsrc[ys:ys + hs, xs:xs + ws] - (hsum / size)
    val = (hssq - (hsum**2.) / size)
    val[val < 0] = 0
    hdiv = val**(1. / 2) + eps

    # -- apply normalization
    # 'volume' threshold
    np.putmask(hdiv, hdiv < (threshold + eps), 1.)
    result = (hnum / hdiv)

    #print result.shape
    hout[:] = result
    #print hout.shape, hout.dtype
    return hout
コード例 #50
0
    now = time.time()

    out.fill(0)

    grid(out, (0, 0.5, 1), size=1, n=10)
    frustum(out, depth_intrinsics)
    axes(out, view([0, 0, 0]), state.rotation, size=0.1, thickness=1)

    if not state.scale or out.shape[:2] == (h, w):
        pointcloud(out, verts, texcoords, color_source)
    else:
        tmp = np.zeros((h, w, 3), dtype=np.uint8)
        pointcloud(tmp, verts, texcoords, color_source)
        tmp = cv2.resize(
            tmp, out.shape[:2][::-1], interpolation=cv2.INTER_NEAREST)
        np.putmask(out, tmp > 0, tmp)

    if any(state.mouse_btns):
        axes(out, view(state.pivot), state.rotation, thickness=4)

    dt = time.time() - now

    cv2.setWindowTitle(
        state.WIN_NAME, "RealSense (%dx%d) %dFPS (%.2fms) %s" %
        (w, h, 1.0/dt, dt*1000, "PAUSED" if state.paused else ""))

    cv2.imshow(state.WIN_NAME, out)
    key = cv2.waitKey(1)

    if key == ord("r"):
        state.reset()
コード例 #51
0
ファイル: nanops.py プロジェクト: frreiss/pandas-fred
def _get_values(
    values: np.ndarray,
    skipna: bool,
    fill_value: Any = None,
    fill_value_typ: Optional[str] = None,
    mask: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]:
    """
    Utility to get the values view, mask, dtype, dtype_max, and fill_value.

    If both mask and fill_value/fill_value_typ are not None and skipna is True,
    the values array will be copied.

    For input arrays of boolean or integer dtypes, copies will only occur if a
    precomputed mask, a fill_value/fill_value_typ, and skipna=True are
    provided.

    Parameters
    ----------
    values : ndarray
        input array to potentially compute mask for
    skipna : bool
        boolean for whether NaNs should be skipped
    fill_value : Any
        value to fill NaNs with
    fill_value_typ : str
        Set to '+inf' or '-inf' to handle dtype-specific infinities
    mask : Optional[np.ndarray]
        nan-mask if known

    Returns
    -------
    values : ndarray
        Potential copy of input value array
    mask : Optional[ndarray[bool]]
        Mask for values, if deemed necessary to compute
    dtype : np.dtype
        dtype for values
    dtype_max : np.dtype
        platform independent dtype
    fill_value : Any
        fill value used
    """
    # In _get_values is only called from within nanops, and in all cases
    #  with scalar fill_value.  This guarantee is important for the
    #  np.where call below
    assert is_scalar(fill_value)
    values = extract_array(values, extract_numpy=True)

    mask = _maybe_get_mask(values, skipna, mask)

    dtype = values.dtype

    datetimelike = False
    if needs_i8_conversion(values.dtype):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = np.asarray(values.view("i8"))
        datetimelike = True

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna and (mask is not None) and (fill_value is not None):
        if mask.any():
            if dtype_ok or datetimelike:
                values = values.copy()
                np.putmask(values, mask, fill_value)
            else:
                # np.where will promote if needed
                values = np.where(~mask, values, fill_value)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.dtype(np.int64)
    elif is_float_dtype(dtype):
        dtype_max = np.dtype(np.float64)

    return values, mask, dtype, dtype_max, fill_value
コード例 #52
0
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
    """
    Encode input values as an enumerated type or categorical variable

    Parameters
    ----------
    values : ndarray (1-d)
        Sequence
    sort : boolean, default False
        Sort by values
    order : deprecated
    na_sentinel : int, default -1
        Value to mark "not found"
    size_hint : hint to the hashtable sizer

    Returns
    -------
    labels : the indexer to the original array
    uniques : ndarray (1-d) or Index
        the unique values. Index is returned when passed values is Index or
        Series

    note: an array of Periods will ignore sort as it returns an always sorted
    PeriodIndex
    """
    if order is not None:
        msg = "order is deprecated. See " \
              "https://github.com/pydata/pandas/issues/6926"
        warn(msg, FutureWarning, stacklevel=2)

    from pandas import Index, Series, DatetimeIndex

    vals = np.asarray(values)

    # localize to UTC
    is_datetimetz = com.is_datetimetz(values)
    if is_datetimetz:
        values = DatetimeIndex(values)
        vals = values.tz_localize(None)

    is_datetime = com.is_datetime64_dtype(vals)
    is_timedelta = com.is_timedelta64_dtype(vals)
    (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)

    table = hash_klass(size_hint or len(vals))
    uniques = vec_klass()
    labels = table.get_labels(vals, uniques, 0, na_sentinel, True)

    labels = com._ensure_platform_int(labels)

    uniques = uniques.to_array()

    if sort and len(uniques) > 0:
        try:
            sorter = uniques.argsort()
        except:
            # unorderable in py3 if mixed str/int
            t = hash_klass(len(uniques))
            t.map_locations(com._ensure_object(uniques))

            # order ints before strings
            ordered = np.concatenate([
                np.sort(np.array([e for i, e in enumerate(uniques) if f(e)],
                                 dtype=object)) for f in
                [lambda x: not isinstance(x, string_types),
                 lambda x: isinstance(x, string_types)]])
            sorter = com._ensure_platform_int(t.lookup(
                com._ensure_object(ordered)))

        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
        reverse_indexer.put(sorter, np.arange(len(sorter)))

        mask = labels < 0
        labels = reverse_indexer.take(labels)
        np.putmask(labels, mask, -1)

        uniques = uniques.take(sorter)

    if is_datetimetz:

        # reset tz
        uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize(
            values.tz)
    elif is_datetime:
        uniques = uniques.astype('M8[ns]')
    elif is_timedelta:
        uniques = uniques.astype('m8[ns]')
    if isinstance(values, Index):
        uniques = values._shallow_copy(uniques, name=None)
    elif isinstance(values, Series):
        uniques = Index(uniques)
    return labels, uniques
コード例 #53
0
ファイル: cmap.py プロジェクト: GBillotey/Fractal-shades
 def clip(x, ext_min, ext_max):
     np.putmask(x, x < ext_min, ext_min)
     np.putmask(x, x > ext_max, ext_max)
     return x
コード例 #54
0
def EliminarFondo(Imagen_Color, Imagen_Profundidad, Distancia, Color_Contorno):
    Columnas, Filas, Dimensiones = Imagen_Color.shape
    for i in range(0, Dimensiones):
        auxiliar = Imagen_Color[:, :, i]
        np.putmask(auxiliar, Imagen_Profundidad > Distancia, Color_Contorno)
        Imagen_Color[:, :, i] = auxiliar
コード例 #55
0
ファイル: cmap.py プロジェクト: GBillotey/Fractal-shades
    def blend(rgb, shade, shade_type=None):
        """
        Provides several "shading" options based on shade_type dict
        *rgb* array of colors to 'shade', shape (nx, 3) or (nx, ny, 3)
        *shade* N&B array shape similar to rgb but last dim is 1
        *shade_type* {"Lch": x1, "overlay": x2, "pegtop": x3}
            x1, x2, x3 positive scalars, the proportion of each shading method
            in the final image.
        """
        if shade_type is None:
            shade_type = {"Lch": 4., "overlay": 4., "pegtop": 1.}

        blend_T = float(
            sum((shade_type.get(key, 0.)
                 for key in ["Lch", "overlay", "pegtop"])))

        is_image = (len(rgb.shape) == 3)
        if is_image:
            imx, imy, ichannel = rgb.shape
            if ichannel != 3:
                raise ValueError("expectd rgb array")
            rgb = np.copy(rgb.reshape(imx * imy, 3))
            shade = np.copy(shade.reshape(imx * imy, 1))

        XYZ = Color_tools.rgb_to_XYZ(rgb[:, 0:3])

        XYZ_overlay = np.zeros([imx * imy, 3])
        XYZ_pegtop = np.zeros([imx * imy, 3])
        XYZ_Lch = np.zeros([imx * imy, 3])

        ref_white = Color_tools.D50_ref_white

        if shade_type.get("overlay", 0.) != 0:
            low = 2. * shade * XYZ
            high = ref_white * 100. - 2. * (1. - shade) * (ref_white * 100. -
                                                           XYZ)
            XYZ_overlay = np.where(XYZ <= 0.5 * ref_white * 100., low, high)

        if shade_type.get("pegtop", 0.) != 0:
            XYZ_pegtop = 2. * shade * XYZ + (1. -
                                             2. * shade) * XYZ**2 / ref_white

        if shade_type.get("Lch", 0.) != 0:
            shade = 2. * shade - 1.
            Lab = Color_tools.XYZ_to_CIELab(XYZ)
            L = Lab[:, 0, np.newaxis]
            a = Lab[:, 1, np.newaxis]
            b = Lab[:, 2, np.newaxis]
            np.putmask(L, shade > 0, L + shade * (100. - L))  # lighten
            np.putmask(L, shade < 0, L * (1. + shade))  # darken
            np.putmask(a, shade > 0, a - shade**2 * a)  # lighten
            np.putmask(a, shade < 0, a * (1. - shade**2))  # darken
            np.putmask(b, shade > 0, b - shade**2 * b)  # lighten
            np.putmask(b, shade < 0, b * (1. - shade**2))  # darken
            Lab[:, 0] = L[:, 0]
            Lab[:, 1] = a[:, 0]
            Lab[:, 2] = b[:, 0]
            XYZ_Lch = Color_tools.CIELab_to_XYZ(Lab)

        XYZ = (XYZ_overlay * shade_type["overlay"] + XYZ_pegtop *
               shade_type["pegtop"] + XYZ_Lch * shade_type["Lch"]) / blend_T

        # Convert modified hsv back to rgb.
        blend = Color_tools.XYZ_to_rgb(XYZ)
        if is_image:
            blend = blend.reshape([imx, imy, 3])
        return blend
コード例 #56
0
def patch_image(t_in, s_out, cm=0):
    try:
        t = t_in.copy()
        ty, tx = t.shape
        if cm > 0:
            m = mask_rect(t == cm)
        else:
            m = (t == cm)
        tile = get_tile(t, m)
        if tile.size > 2 and s_out == t.shape:
            rt = np.tile(
                tile, (1 + ty // tile.shape[0], 1 + tx // tile.shape[1]))[0:ty,
                                                                          0:tx]
            if (rt[~m] == t[~m]).all():
                return rt
        for i in range(6):
            m = (t == cm)
            t -= cm
            if tx == ty:
                a = np.maximum(t, t.T)
                if (a[~m] == t[~m]).all(): t = a.copy()
                a = np.maximum(t, np.flip(t).T)
                if (a[~m] == t[~m]).all(): t = a.copy()
            a = np.maximum(t, np.flipud(t))
            if (a[~m] == t[~m]).all(): t = a.copy()
            a = np.maximum(t, np.fliplr(t))
            if (a[~m] == t[~m]).all(): t = a.copy()
            t += cm
            m = (t == cm)
            lms = measure.label(m.astype('uint8'))
            for l in range(1, lms.max() + 1):
                lm = np.argwhere(lms == l)
                lm = np.argwhere(lms == l)
                x_min = max(0, lm[:, 1].min() - 1)
                x_max = min(lm[:, 1].max() + 2, t.shape[0])
                y_min = max(0, lm[:, 0].min() - 1)
                y_max = min(lm[:, 0].max() + 2, t.shape[1])
                gap = t[y_min:y_max, x_min:x_max]
                sy, sx = gap.shape
                if i == 1:
                    sy //= 2
                    y_max = y_min + sx
                gap = t[y_min:y_max, x_min:x_max]
                sy, sx = gap.shape
                allst = as_strided(t,
                                   shape=(ty, tx, sy, sx),
                                   strides=2 * t.strides)
                allst = allst.reshape(-1, sy, sx)
                allst = np.array(
                    [a for a in allst if np.count_nonzero(a == cm) == 0])
                gm = (gap != cm)
                for a in allst:
                    if sx == sy:
                        fpd = a.T
                        fad = np.flip(a).T
                        if i == 1: gm[sy - 1, 0] = gm[0, sx - 1] = False
                        if (fpd[gm] == gap[gm]).all():
                            gm = (gap != cm)
                            np.putmask(gap, ~gm, fpd)
                            t[y_min:y_max, x_min:x_max] = gap
                            break
                        if i == 1: gm[0, 0] = gm[sy - 1, sx - 1] = False
                        if (fad[gm] == gap[gm]).all():
                            gm = (gap != cm)
                            np.putmask(gap, ~gm, fad)
                            t[y_min:y_max, x_min:x_max] = gap
                            break
                    fud = np.flipud(a)
                    flr = np.fliplr(a)
                    if i == 1:
                        gm[sy - 1,
                           0] = gm[0, sx - 1] = gm[0, 0] = gm[sy - 1,
                                                              sx - 1] = False
                    if (a[gm] == gap[gm]).all():
                        gm = (gap != cm)
                        np.putmask(gap, ~gm, a)
                        t[y_min:y_max, x_min:x_max] = gap
                        break
                    elif (fud[gm] == gap[gm]).all():
                        gm = (gap != cm)
                        np.putmask(gap, ~gm, fud)
                        t[y_min:y_max, x_min:x_max] = gap
                        break
                    elif (flr[gm] == gap[gm]).all():
                        gm = (gap != cm)
                        np.putmask(gap, ~gm, flr)
                        t[y_min:y_max, x_min:x_max] = gap
                        break
        if s_out == t.shape:
            return t
        else:
            m = (t_in == cm)
            return np.resize(t[m], crop_min(m).shape)
    except:
        return np.resize(t_in, s_out)
コード例 #57
0
def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
    """
    Sort ``values`` and reorder corresponding ``labels``.
    ``values`` should be unique if ``labels`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    .. versionadded:: 0.19.0

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``labels`` is not None.
    labels : list_like
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``na_sentinel``.
    na_sentinel : int, default -1
        Value in ``labels`` to mark "not found".
        Ignored when ``labels`` is None.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``labels`` is None.

    Returns
    -------
    ordered : ndarray
        Sorted ``values``
    new_labels : ndarray
        Reordered ``labels``; returned when ``labels`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``labels`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``labels`` is not None and ``values`` contain duplicates.
    """
    if not is_list_like(values):
        raise TypeError("Only list-like objects are allowed to be passed to"
                        "safe_sort as values")
    values = np.array(values, copy=False)

    def sort_mixed(values):
        # order ints before strings, safe in py3
        str_pos = np.array([isinstance(x, string_types) for x in values],
                           dtype=bool)
        nums = np.sort(values[~str_pos])
        strs = np.sort(values[str_pos])
        return _ensure_object(np.concatenate([nums, strs]))

    sorter = None
    if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer':
        # unorderable in py3 if mixed str/int
        ordered = sort_mixed(values)
    else:
        try:
            sorter = values.argsort()
            ordered = values.take(sorter)
        except TypeError:
            # try this anyway
            ordered = sort_mixed(values)

    # labels:

    if labels is None:
        return ordered

    if not is_list_like(labels):
        raise TypeError("Only list-like objects or None are allowed to be"
                        "passed to safe_sort as labels")
    labels = _ensure_platform_int(np.asarray(labels))

    from pandas import Index
    if not assume_unique and not Index(values).is_unique:
        raise ValueError("values should be unique if labels is not None")

    if sorter is None:
        # mixed types
        (hash_klass, _), values = _get_data_algo(values, _hashtables)
        t = hash_klass(len(values))
        t.map_locations(values)
        sorter = _ensure_platform_int(t.lookup(ordered))

    reverse_indexer = np.empty(len(sorter), dtype=np.int_)
    reverse_indexer.put(sorter, np.arange(len(sorter)))

    mask = (labels < -len(values)) | (labels >= len(values)) | \
        (labels == na_sentinel)

    # (Out of bound indices will be masked with `na_sentinel` next, so we may
    # deal with them here without performance loss using `mode='wrap'`.)
    new_labels = reverse_indexer.take(labels, mode='wrap')
    np.putmask(new_labels, mask, na_sentinel)

    return ordered, _ensure_platform_int(new_labels)
コード例 #58
0
ファイル: cmap.py プロジェクト: GBillotey/Fractal-shades
    def shade_layer(normal,
                    theta_LS,
                    phi_LS,
                    shininess=0.,
                    ratio_specular=0.,
                    **kwargs):
        """
        *normal* flat array of normal vect
        shade_dict:
            "theta_LS" angle of incoming light [0, 360]
            "phi_LS"   azimuth of incoming light [0, 90] 90 is vertical
            "shininess" material coefficient for specular
            "ratio_specular" ratio of specular to lambert
        Returns 
        *shade* array of light intensity, greyscale image (value btwn 0 and 1)
        https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model
        """
        if "LS_coords" in kwargs.keys():
            # LS is localized somewhere in the image computing theta_LS
            # as a vector
            LSx, LSy = kwargs["LS_coords"]
            (ix, ixx, iy, iyy) = kwargs["chunk_slice"]
            chunk_mask = kwargs["chunk_mask"]
            nx = kwargs["nx"]
            ny = kwargs["ny"]
            nx_grid = (np.arange(ix, ixx, dtype=np.float32) / nx) - 0.5
            ny_grid = (np.arange(iy, iyy, dtype=np.float32) / ny) - 0.5
            ny_vec, nx_vec = np.meshgrid(ny_grid, nx_grid)
            theta_LS = -np.ravel(np.arctan2(LSy - ny_vec,
                                            nx_vec - LSx)) + np.pi
            if chunk_mask is not None:
                theta_LS = theta_LS[chunk_mask]
        else:
            # Default case LS at infinity incoming angle provided
            theta_LS = theta_LS * np.pi / 180.
        phi_LS = phi_LS * np.pi / 180.

        if "exp_map" in kwargs.keys():
            raise ValueError()  # debug
            # Normal angle correction in case of exponential map
            if kwargs["exp_map"]:
                (ix, ixx, iy, iyy) = kwargs["chunk_slice"]
                chunk_mask = kwargs["chunk_mask"]
                nx = kwargs["nx"]
                ny = kwargs["ny"]
                nx_grid = (np.arange(ix, ixx, dtype=np.float32) / nx) - 0.5
                ny_grid = (np.arange(iy, iyy, dtype=np.float32) / ny) - 0.5
                ny_vec, nx_vec = np.meshgrid(ny_grid, nx_grid)
                expmap_angle = np.ravel(np.exp(-1j * (ny_vec) * np.pi * 2.))
                if chunk_mask is not None:
                    expmap_angle = expmap_angle[chunk_mask]
                normal = normal * expmap_angle

        # k_ambient = - 1. / (2. * ratio_specular + 1.)
        k_lambert = 1.  #- 2. * k_ambient
        k_spec = ratio_specular * k_lambert

        # Light source coordinates
        LSx = np.cos(theta_LS) * np.cos(phi_LS)
        LSy = np.sin(theta_LS) * np.cos(phi_LS)
        LSz = np.sin(phi_LS)

        # Normal vector coordinates - Lambert shading
        nx = normal.real
        ny = normal.imag
        nz = np.sqrt(1. - nx**2 - ny**2)
        if "inverse_n" in kwargs.keys():
            if kwargs["inverse_n"]:
                nx = -nx
                ny = -ny

        lambert = LSx * nx + LSy * ny + LSz * nz
        np.putmask(lambert, lambert < 0., 0.)

        # half-way vector coordinates - Blinn Phong shading
        specular = np.zeros_like(lambert)
        if ratio_specular != 0.:
            phi_half = (np.pi * 0.5 + phi_LS) * 0.5
            half_x = np.cos(theta_LS) * np.sin(phi_half)
            half_y = np.sin(theta_LS) * np.sin(phi_half)
            half_z = np.cos(phi_half)
            spec_angle = half_x * nx + half_y * ny + half_z * nz
            np.putmask(spec_angle, spec_angle < 0., 0.)
            specular = np.power(spec_angle, shininess)

        res = k_lambert * lambert + k_spec * specular  # + k_ambient

        #res[normal == 0.] = 0.5 * (np.nanmin(res) + np.nanmax(res))
        try:
            np.putmask(
                res, normal == 0.,
                np.nanmin(res) + 0.5 * (np.nanmax(res) - np.nanmin(res)))
        except ValueError:
            pass

        return res  # k_ambient + k_lambert * lambert + k_spec * specular
コード例 #59
0
 def replace_atom_types(z):
     np.putmask(z, np.isin(z, list(self.atom_types), invert=True), -1)
     return z
コード例 #60
0
ファイル: ops.py プロジェクト: yazici/pandas
 def f(x):
     x = pa.array(x, dtype=self.dtype)
     np.putmask(x, mask, self.fill_value)
     return x