Example #1
0
def check_input(times,signal,**kwargs):
    """
    Check the input arguments for periodogram calculations for mistakes.
    
    If you get an error when trying to compute a periodogram, and you don't
    understand it, just feed the input you gave to this function, and it will
    perform some basic checks.
    """
    #-- check if the input are arrays and have the same 1D shape
    is_array0 = isinstance(times,np.ndarray)
    is_array1 = isinstance(signal,np.ndarray)
    if not is_array0: print(termtools.red('ERROR: time input is not an array'))
    if not is_array1: print(termtools.red('ERROR: signal input is not an array'))
    if not is_array0 or not is_array1:
        times = np.asarray(times)
        signal = np.asarray(signal)
        print(termtools.green("---> FIXED: inputs are arrays"))
    print(termtools.green("OK: inputs are arrays"))
    onedim = (len(times.shape)==1) & (len(signal.shape)==1)
    same_shape = times.shape==signal.shape
    if not onedim or not same_shape:
        print(termtools.red('ERROR: input is not 1D or not of same length'))
        return False
    print(termtools.green("OK: inputs are 1D and have same length"))
    #-- check if the signal constains nans or infs:
    isnan0 = np.sum(np.isnan(times))
    isnan1 = np.sum(np.isnan(signal))
    isinf0 = np.sum(np.isinf(times))
    isinf1 = np.sum(np.isinf(signal))
    if isnan0: print(termtools.red('ERROR: time array contains nans'))
    if isnan1: print(termtools.red('ERROR: signal array contains nans'))
    if isinf0: print(termtools.red('ERROR: time array contains infs'))
    if isinf1: print(termtools.red('ERROR: signal array contains infs'))
    if not isnan0 and not isnan1 and not isinf0 and not isinf1:
        print(termtools.green('OK: no infs or nans'))
    else:
        keep = -np.isnan(times) & -np.isnan(signal) & -np.isinf(times) & -np.isinf(signal)
        times,signal = times[keep],signal[keep]
        print(termtools.green('---> FIXED: infs and nans removed'))
    #-- check if the timeseries is sorted
    is_sorted = np.all(np.diff(times)>0)
    if not is_sorted:
        print(termtools.red('ERROR: time array is not sorted'))
        sa = np.argsort(times)
        times,signal = times[sa],signal[sa]
        print(termtools.green('---> FIXED: time array is sorted'))
    else:
        print(termtools.green("OK: time array is sorted"))
    print(termtools.green("No inconsistencies found or inconsistencies are fixed"))
    
    #-- check keyword arguments:
    fnyq = getNyquist(times,nyq_stat=np.min)
    print("Default Nyquist frequency: {}".format(fnyq))
    if 'nyq_stat' in kwargs:
        fnyq = getNyquist(times,nyq_stat=kwargs['nyq_stat'])
        print("Nyquist value manually set to {}".format(fnyq))
    if 'fn' in kwargs and kwargs['fn']>fnyq:
        print(termtools.red("Final frequency 'fn' is larger than the Nyquist frequency"))
    return times,signal
def scargle_probability(peak_value,
                        times,
                        freqs,
                        correct_for_frange=False,
                        **kwargs):
    """
    Compute the probability to observe a peak in the Scargle periodogram.

    If C{correct_for_frange=True}, the Bonferroni correction will be applied
    to a smaller number of frequencies (i.e. the independent number of
    frequencies in C{freqs}). To be conservative, set C{correct_for_frange=False}.

    Example simulation:

    >>> times = np.linspace(0,1,5000)
    >>> N = 500
    >>> probs = np.zeros(N)
    >>> peaks = np.zeros(N)
    >>> for i in range(N):
    ...     signal = np.random.normal(size=len(times))
    ...     f,s = scargle(times,signal,threads='max',norm='distribution')
    ...     peaks[i] = s.max()
    ...     probs[i] = scargle_probability(s.max(),times,f)

    Now make a plot:

    >>> p = pl.figure()
    >>> p = pl.subplot(131)
    >>> p = pl.plot(probs,'ko')
    >>> p = pl.plot([0,N],[0.01,0.01],'r-',lw=2)
    >>> p = pl.subplot(132)
    >>> p = pl.plot(peaks[np.argsort(peaks)],probs[np.argsort(peaks)],'ro')
    >>> p = pl.plot(peaks[np.argsort(peaks)],1-(1-np.exp(-np.sort(peaks)))**(10000.),'g-')
    >>> #p = pl.plot(peaks[np.argsort(peaks)],1-(1-(1-np.sort(peaks)/2500.)**2500.)**(10000.),'b--')
    >>> p = pl.subplot(133)
    >>> for i in np.logspace(-3,0,100):
    ...     p = pl.plot([i*100],[np.sum(probs<i)/float(N)*100],'ko')
    >>> p = pl.plot([1e-6,100],[1e-6,100],'r-',lw=2)
    >>> p = pl.xlabel('Should observe this many points below threshold')
    >>> p = pl.ylabel('Observed this many points below threshold')

    ]]include figure]]ivs_timeseries_pergrams_prob.png]

    """
    #-- independent frequencies
    nr_obs = len(times)
    ni = 2 * nr_obs
    #-- correct the nr of independent frequencies for the frequency range
    #   that is tested, but only if it is requested
    if correct_for_frange:
        nyqstat = kwargs.pop('nyqstat', np.min)
        nyquist = getNyquist(times, nyqstat=nyqstat)
        ni = int(freqs.ptp() / nyquist * ni)
    #p_value = 1. - (1.- (1-2*peak_value/nr_obs)**(nr_obs/2))**ni
    p_value = 1. - (1. - np.exp(-peak_value))**ni
    return p_value
def scargle_probability(peak_value,times,freqs,correct_for_frange=False,**kwargs):
    """
    Compute the probability to observe a peak in the Scargle periodogram.
    
    If C{correct_for_frange=True}, the Bonferroni correction will be applied
    to a smaller number of frequencies (i.e. the independent number of
    frequencies in C{freqs}). To be conservative, set C{correct_for_frange=False}.
    
    Example simulation:
    
    >>> times = np.linspace(0,1,5000)
    >>> N = 500
    >>> probs = np.zeros(N)
    >>> peaks = np.zeros(N)
    >>> for i in range(N):
    ...     signal = np.random.normal(size=len(times))
    ...     f,s = scargle(times,signal,threads='max',norm='distribution')
    ...     peaks[i] = s.max()
    ...     probs[i] = scargle_probability(s.max(),times,f)
    
    Now make a plot:
    
    >>> p = pl.figure()
    >>> p = pl.subplot(131)
    >>> p = pl.plot(probs,'ko')
    >>> p = pl.plot([0,N],[0.01,0.01],'r-',lw=2)
    >>> p = pl.subplot(132)
    >>> p = pl.plot(peaks[np.argsort(peaks)],probs[np.argsort(peaks)],'ro')
    >>> p = pl.plot(peaks[np.argsort(peaks)],1-(1-np.exp(-np.sort(peaks)))**(10000.),'g-')
    >>> #p = pl.plot(peaks[np.argsort(peaks)],1-(1-(1-np.sort(peaks)/2500.)**2500.)**(10000.),'b--')
    >>> p = pl.subplot(133)
    >>> for i in np.logspace(-3,0,100):
    ...     p = pl.plot([i*100],[np.sum(probs<i)/float(N)*100],'ko')
    >>> p = pl.plot([1e-6,100],[1e-6,100],'r-',lw=2)
    >>> p = pl.xlabel('Should observe this many points below threshold')
    >>> p = pl.ylabel('Observed this many points below threshold')
    
    ]]include figure]]ivs_timeseries_pergrams_prob.png]
    
    """
    #-- independent frequencies
    nr_obs = len(times)
    ni = 2*nr_obs
    #-- correct the nr of independent frequencies for the frequency range
    #   that is tested, but only if it is requested
    if correct_for_frange:
        nyqstat = kwargs.pop('nyqstat',np.min)
        nyquist = getNyquist(times,nyqstat=nyqstat)
        ni = int(freqs.ptp()/nyquist*ni)
    #p_value = 1. - (1.- (1-2*peak_value/nr_obs)**(nr_obs/2))**ni
    p_value = 1. - (1.- np.exp(-peak_value))**ni
    return p_value