def percentage_removed(data, percentage=None, threshold=None, embedding_technique=None): if (embedding_technique == "node_centrality"): fst_val = 1 snd_val = 0. else: fst_val = data snd_val = 0. if (plt.is_numlike(percentage) and (plt.is_numlike(threshold))): print( "You can use only the percentage or the threshold not both at the same time" ) return -1 elif (plt.is_numlike(percentage)): triu = np.triu( data, k=1 ) # choose symetric upper triangular matrix without its diagonal triu = np.sort(np.reshape( triu, [1, len(triu)**2])) # reshape it to 1-D vector triu = np.trim_zeros( np.squeeze(triu)) # remove any leading or trailing zeros percentage *= 100 th = ((len(triu) - 1) * percentage) // 100 data = np.where(data > triu[th], fst_val, snd_val) # binary adjacency matrix elif (plt.is_numlike(threshold)): data = np.where(data > threshold, fst_val, snd_val) # binary adjacency matrix return percentage, th, data, triu
def checkfield(u, v, varnames=["u", "v"]): """ Perform sanity checks on the input vector field """ for nk, arr in enumerate([u, v]): varname = varnames[nk] try: sha = arr.shape except: raise TypeError('Input `' + varname + '` must be a NumPy array, not ' + type(arr).__name__ + '!') if len(sha) != 2: raise ValueError('Input `' + varname + '` must be a `M`-by-`N` NumPy array') if (min(sha) == 1) or (sha[0] != sha[1]): raise ValueError('Input `' + varname + '` must be a `M`-by-`N` NumPy array!') if not plt.is_numlike(arr) or not np.isreal(arr).all(): raise TypeError('Input `' + varname + '` must be a real-valued `M`-by-`N` NumPy array!') if np.isfinite(arr).min() == False: raise ValueError( 'Input `' + varname + '` must be a real valued NumPy array without Infs or NaNs!') if u.shape[0] != v.shape[0] or u.shape[1] != v.shape[1]: raise ValueError("Inputs `" + varnames[0] + "` and `" + varnames[1] + "` must have the same dimension!") return
def checkinput(N, ns): """ Perform sanity checks on the inputs `N` and `ns` """ # Sanity checks for `N` and `ns` names = ["N", "ns"] for nk, val in enumerate(N, ns): if not np.isscalar(val) or not plt.is_numlike(val) or not np.isreal( val).all(): raise TypeError("Input `" + names[nk] + "` must be a real scalar!") if not np.isfinite(val): raise TypeError("Input `" + names[nk] + "` must be finite!") # N if round(N) != N: raise TypeError("`N` has to be a positive integer!") if N <= 1: raise ValueError("`N` has to be greater than 1!") # ns if ns < 0 or ns > 1: raise ValueError("`ns` has to be in [0,1]!") return
def noise_frames_from_etc(N, height_px, width_px, gain=1, band=None, t_exp=None, etc_input=None): """ Generate a series of N noise frames with dimensions (height_px, width_px) based on the output of exposure_time_calc() (in etc.py). A previous ETC output returned by exposure_time_calc() can be supplied, or can be generated if band and t_exp are specified. The output is returned in the form of a dictionary allowing the sky, dark current, cryostat and read noise contributions to be accessed separately. The frame generated by summing each of these components is also generated. Important note: we do NOT create master frames here to aviod confusion. The purpose of this routine is to return individual noise frames that can be added to images. However the master frames must not be created from the same frames that are added to images as this is not realistic. """ print ("Generating noise frames...") # The output is stored in a dictionary with each entry containing the noise frames. noise_frames_dict = { 'sky' : np.zeros((N, height_px, width_px), dtype=int), # Note: the sky includes the emission from the telescope. 'dark' : np.zeros((N, height_px, width_px), dtype=int), 'cryo' : np.zeros((N, height_px, width_px), dtype=int), 'RN' : np.zeros((N, height_px, width_px), dtype=int), 'total' : np.zeros((N, height_px, width_px), dtype=int), 'gain-multiplied' : np.zeros((N, height_px, width_px), dtype=int), 'unity gain' : np.zeros((N, height_px, width_px), dtype=int), 'post-gain' : np.zeros((N, height_px, width_px), dtype=int) } # Getting noise parameters from the ETC. if not etc_input: if not optical_system: print("ERROR: if no ETC input is specified, then you must pass an instance of an opticalSystem!") raise UserWarning else: # If no ETC input is given then we generate a new one. if plt.is_numlike(t_exp) and band: etc_output = etc.exposure_time_calc(optical_system = optical_system, band = band, t_exp = t_exp) else: print("ERROR: if no ETC input is specified, then to calculate the noise levels you must also specify t_exp and the imaging band!") raise UserWarning else: # Otherwise, we just return whatever was entered. etc_output = etc_input # Adding noise to each image and multiplying by the detector gain where appropriate. noise_frames_dict['sky'] = noise_frames(height_px, width_px, etc_output['unity gain']['N_sky'], N_frames = N) * gain noise_frames_dict['dark'] = noise_frames(height_px, width_px, etc_output['unity gain']['N_dark'], N_frames = N) * gain noise_frames_dict['cryo'] = noise_frames(height_px, width_px, etc_output['unity gain']['N_cryo'], N_frames = N) * gain noise_frames_dict['RN'] = noise_frames(height_px, width_px, etc_output['unity gain']['N_RN'], N_frames = N) noise_frames_dict['total'] = noise_frames_dict['sky'] + noise_frames_dict['cryo'] + noise_frames_dict['RN'] + noise_frames_dict['dark'] noise_frames_dict['gain-multiplied'] = noise_frames_dict['sky'] + noise_frames_dict['cryo'] + noise_frames_dict['dark'] noise_frames_dict['unity gain'] = noise_frames_dict['gain-multiplied'] / gain noise_frames_dict['post-gain'] = noise_frames_dict['RN'] return noise_frames_dict, etc_output
def percentage_removed(data, percentage = None, threshold = None, embedding_technique = None): if (embedding_technique == "node_centrality"): fst_val = 1 snd_val = 0. else: fst_val = data snd_val = 0. if (plt.is_numlike(percentage) and (plt.is_numlike(threshold))): print("You can use only the percentage or the threshold not both at the same time") return -1 elif (plt.is_numlike(percentage)): triu = np.triu(data, k = 1) # choose symetric upper triangular matrix without its diagonal triu = np.sort(np.reshape(triu, [1, len(triu)**2])) # reshape it to 1-D vector triu = np.trim_zeros(np.squeeze(triu)) # remove any leading or trailing zeros percentage *= 100 th = ((len(triu) - 1) * percentage) // 100 data = np.where(data > triu[th], fst_val, snd_val) # binary adjacency matrix elif (plt.is_numlike(threshold)): data = np.where(data > threshold, fst_val, snd_val) # binary adjacency matrix return percentage, th, data, triu
def add_tt(image, sigma_tt_px=None, tt_idxs=None): if not plt.is_numlike(sigma_tt_px) and not plt.is_numlike(tt_idxs): print("ERROR: either sigma_tt_px OR tt_idxs must be specified!") raise UserWarning # Adding a randomised tip/tilt to the image if plt.is_numlike(sigma_tt_px): # If no vector of tip/tilt values is specified, then we use random numbers. shift_height = np.random.randn() * sigma_tt_px shift_width = np.random.randn() * sigma_tt_px tt_idxs = [shift_height, shift_width] else: # Otherwise we take them from the input vector. shift_height = tt_idxs[0] shift_width = tt_idxs[1] image_tt = scipy.ndimage.interpolation.shift(image, (shift_height, shift_width)) return image_tt, tt_idxs
def graph_plot(graph_object, counter, graph_loc, _from = None, _to = None, nFigures = None,\ subplot = False): if (isnull(nFigures)): nx.draw(graph_object, nx.spring_layout(graph_object)) elif (not isnull(nFigures) and plt.is_numlike(nFigures) and subplot == False): if (counter < nFigures): plt.figure() nx.draw(graph_object, nx.spring_layout(graph_object)) elif (not isnull(_from) and plt.is_numlike(_from) and not isnull(_to) \ and plt.is_numlike(_to) and subplot == True): # plt.figure() # if (counter == 0): j = counter # else: j = counter * 48 # inner_counter = 0 if (_from <= counter and counter < _to): # graph_position = np.arange(0, 192, 48) plt.subplot2grid((192, 3), (graph_loc, 3), \ rowspan = 48, colspan = 4) nx.draw(graph_object, nx.spring_layout(graph_object)) plt.hold(True) # inner_counter += 1 plt.axis('tight') plt.show()
def scalarcheck(val, varname, kind=None, bounds=None): """ Local helper function performing sanity checks on scalars """ if not np.isscalar(val) or not plt.is_numlike(val): raise TypeError("Input `" + varname + "` must be a scalar!") if not np.isfinite(val) or not np.isreal(val): raise ValueError("Input `" + varname + "` must be real and finite!") if kind == 'int': if (round(val) != val): raise ValueError("Input `" + varname + "` must be an integer!") if bounds is not None: if val < bounds[0] or val > bounds[1]: raise ValueError("Input scalar `" + varname + "` must be between " + str(bounds[0]) + " and " + str(bounds[1]) + "!")
def plot(self, color='gray', normed=True, barPlot=True): """Uses matplotlib to generate a minimalist histogram. Parameters ---------- color : any valid matplotlib color (e.g. 'red', 'LightBrown' or (0.5,0.1,0.9) ) normed : bool A normed histogram has fractional frequencies as heights. barPlot : bool True (default) produces a bar plot as opposed to a line with markers. Returns ------- axh : matplotlib axes handle """ if all([is_numlike(k) for k in list(self.keys())]): """If keys are numbers then use the x-axis scale""" if all([round(k)==k for k in list(self.keys())]): xvec = [int(k) for k in sorted(self.keys())] else: xvec = sorted(self.keys()) xlab = xvec else: xlab = sorted(self.keys()) xvec = np.arange(len(xlab)) if normed: yDict = self.freq() else: yDict = self if barPlot: for x, k in zip(xvec, xlab): bar(x, yDict[k], align = 'center', color=color) else: plot(xvec, [yDict[k] for k in xlab], 's-', color=color) xticks(xvec, xlab)
def perm_test(X,Y,paired=None,useR=False,nperms=10000,tail='two',correction="maxT",get_dist=False,mth="t",\ verbose=True,fname=None,vars=None,g1str=None,g2str=None): """ Perform permutation tests for paired/unpaired uni-/multi-variate two-sample problems Parameters ---------- X : NumPy 2darray An #samples-by-#variables array holding the data of the first group X : NumPy 2darray An #samples-by-#variables array holding the data of the second group paired : bool Switch to indicate whether the two data-sets `X` and `Y` represent paired (`paired = True`) or unpaired data. useR : bool Switch that determines whether the `R` library `flip` is used for testing. Note: unpaired data can only be tested in `R`! nperms : int Number of permutations for shuffling the input data tail : str The alternative hypothesis the data is tested against. If `tail = 'less'`, then the null is tested against the alternative that the mean of the first group is less than the mean of the second group ('lower tailed'). Alternatively, `tail = 'greater'` indicates the alternative that the mean of the first group is greater than the mean of the second group ('upper tailed'). For `tail = 'two'` the alternative hypothesis is that the means of the data are different ('two tailed'), correction : str Multiplicity correction method. If the `R` package `flip` is not used for testing (`useR = False`) this option is ignored, since `MNE`'s permutation t-test only supports `p`-value correction using the maximal test statistic `Tmax` [2]_. Otherwise (either if `paired = False` or `useR = True`) the `R` library `flip` is used which supports the options "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "Fisher", "Liptak", "Tippett", "MahalanobisT", "MahalanobisP", "minP", "maxT", "maxTstd", "sumT", "Direct", "sumTstd", "sumT2" (see [1]_ for a detailed explanation). By default "maxT" is used. get_dist : bool Switch that determines whether the sampling distribution used for testing is returned (by default it is not returned). mth : str Only relevant if testing is done in `R` (`useR = True` or `paired = False`). If `mth` is not specified a permutation t-test will be performed. Available (but completely untested!) options are: "t", "F", "ANOVA","Kruskal-Wallis", "kruskal", "Mann-Whitney", "sum", "Wilcoxon", "rank", "Sign" (see [1]_ for details). Note that by design this wrapper only supports two-sample problems (`X` and `Y`). To analyze `k`-sample data using, e.g., an ANOVA, please refer to the `flip` package directly. verbose : bool If `verbose = True` then intermediate results, progression messages and a table holding the final statistical evaluation are printed to the prompt. fname : str If provided, testing results are saved to the csv file `fname`. The file-name can be provided with or without the extension '.csv' (WARNING: existing files will be overwritten!). By default, the output is not saved. vars : list or NumPy 1darray Names of the variables that are being tested. Only relevant if `verbose = True` and/or `fname` is not `None`. If `vars` is `None` and output is shown and/or saved, a generic list `['Variable 1','Variable 2',...]` will be used in the table summarizing the final results. g1str : str Name of the first sample. Only relevant if `verbose = True` and/or `fname` is not `None`. If `g1str = None` and output is shown/saved a generic group name ('Group 1') will be used in the table showing the final results. g2str : str Name of the second sample. Only relevant if `verbose = True` and/or `fname` is not `None`. If `g2str = None` and output is shown/saved a generic group name ('Group 2') will be used in the table showing the final results. Returns ------- stats_dict : dictionary Test-results are saved in a Python dictionary. By default `stats_dict` has the keys 'pvals' (the adjusted `p`-values) and 'statvals' (values of the test statistic observed for all variables). If `get_dist = True` then an additional entry 'dist' is created for the employed sampling distribution. Notes ----- This routine is merely a wrapper and does not do any heavy computational lifting. In case of paired data and `useR = False` the function `permutation_t_test` of the `MNE` package [2]_ is called. If the samples are independent (`paired = False`) or `useR = True` the `R` library `flip` [1]_ is loaded. Thus, this routine has a number of dependencies: for paired data at least the Python package `mne` is required, unpaired samples can only be tested if `pandas` as well as `rpy2` (for `R`/Python conversion) and, of course, `R` and the `R`-library `flip` are installed (and in the search path). To show/save results the routine `printstats` (part of this module) is called. See also -------- printstats : routine to pretty-print results computed by a hypothesis test flip : a `R` library for uni-variate and multivariate permutation (and rotation) tests, currently available `here <https://cran.r-project.org/web/packages/flip/index.html>`_ mne : a software package for processing magnetoencephalography (MEG) and electroencephalography (EEG) data, currently available at the Python Package Index `here <https://pypi.python.org/pypi/mne/0.7.1>`_ Examples -------- Assume we want to analyze medical data of 200 healthy adult subjects collected before and after physical exercise. For each subject, we have measurements of heart-rate (HR), blood pressure (BP) and body temperature (BT) before and after exercise. Thus our data sets contain 200 observations of 3 variables. We want to test the data for a statistically significant difference in any of the three observed quantities (HR, BP, BT) after physical exercise compared to the measurements acquired before exercise. Assume all samples are given as Python lists: `HR_before`, `BP_before`, `BT_before`, `HR_after`, `BP_after`, `BT_after`. To be able to use `perm_test`, we collect the data in NumPy arrays: >>> import numpy as np >>> X = np.zeros((200,3)) >>> X[:,0] = HR_before >>> X[:,1] = BP_before >>> X[:,2] = BT_before >>> Y = np.zeros((200,3)) >>> Y[:,0] = HR_after >>> Y[:,1] = BP_after >>> Y[:,2] = BT_after Our null-hypothesis is that physical exercise did not induce a significant change in any of the observed variables. As an alternative hypothesis, we assume that exercise induced an increase in heart rate, blood pressure and body temperature. To test our hypotheses we use the following command >>> perm_test(X,Y,paired=True,nperms=20000,tail='less',fname='stats.csv', >>> vars=['Heart Rate','Blood Pressure','Body Temperature'], >>> g1str='Before Exercise',g2str='After Exercise') which performs a lower-tailed paired permutation t-test with 20000 permutations, prints the results to the prompt and also saves them in the file `stats.csv`. References ---------- .. [1] F. Pesarin. Multivariate Permutation Tests with Applications in Biostatistics. Wiley, New York, 2001. .. [2] A. Gramfort, M. Luessi, E. Larson, D. Engemann, D. Strohmeier, C. Brodbeck, L. Parkkonen, M. Haemaelaeinen. MNE software for processing MEG and EEG data. NeuroImage 86, 446-460, 2014 """ # Check mandatory inputs and make sure `X` and `Y` are tested for the same no. of variables try: [nsamples_x,n_testsx] = X.shape except: raise TypeError('First input `X` has to be a NumPy 2darray!') try: [nsamples_y,n_testsy] = Y.shape except: raise TypeError('First input `Y` has to be a NumPy 2darray!') if n_testsx != n_testsy: raise ValueError('Number of variables different in `X` and `Y`!') n_tests = n_testsx for arr in [X,Y]: if not np.issubdtype(arr.dtype, np.number) or not np.isreal(arr).all(): raise ValueError('Inputs `X` and `Y` must be real-valued NumPy 2darrays') if np.isfinite(arr).min() == False: raise ValueError('Inputs `X` and `Y` must be real-valued NumPy 2darrays without Infs or NaNs!') if np.min([nsamples_x,nsamples_y]) < n_tests: print "WARNING: Number of variables > number of samples!" # Check `paired` and make sure that input arrays make sense in case we have paired data if not isinstance(paired,bool): raise TypeError("The switch `paired` has to be Boolean!") if nsamples_x != nsamples_y and paired == True: raise ValueError('Cannot perform paired test with different number of samples!') pairlst = ["unpaired","paired"] # Check `useR` if not isinstance(useR,bool): raise TypeError("The switch `useR` has to be Boolean!") if not paired: useR = True # Check `get_dist` if not isinstance(get_dist,bool): raise TypeError("The switch `get_dist` has to be Boolean!") # Check `nperms` if not np.isscalar(nperms) or not plt.is_numlike(nperms) or not np.isreal(nperms).all(): raise TypeError("The number of permutations has to be provided as scalar!") if not np.isfinite(nperms): raise TypeError("The number of permutations must be finite!") if (round(nperms) != nperms): raise ValueError("The number of permutations must be an integer!") # Check `mth` if not isinstance(mth,(str,unicode)): raise TypeError("The test-statistic has to be specified using a string, not "+type(mth).__name__+"!") if useR: msg = '' if paired: supported = ["t", "Wilcoxon", "rank", "Sign","sum"] if mth not in supported: msg = 'Unsupported method '+str(mth)+\ '. Available options for PAIRED data are: '+sp_str else: supported = ["t", "F", "ANOVA","Kruskal-Wallis", "kruskal", "Mann-Whitney", "sum"] if mth not in supported: msg = 'Unsupported method '+str(mth)+\ '. Available options for UNPAIRED data are: '+sp_str if len(msg) > 0: sp_str = str(supported) sp_str = sp_str.replace('[','') sp_str = sp_str.replace(']','') raise ValueError(msg) else: if mth != "t": print "WARNING: The optional argument `mth` will be ignored since R will not be used!" # Check `tail` if provided if not isinstance(tail,(str,unicode)): raise TypeError("The alternative hypothesis has to be specified using a string, not "+\ type(tail).__name__+"!") supported = {'greater':1,'less':-1,'two':0} spl = supported.keys() if tail not in spl: sp_str = str(spl) sp_str = sp_str.replace('[','') sp_str = sp_str.replace(']','') msg = "The alternative hypothesis given by tail = '"+str(tail)+ "' is invalid. "+\ "Available options are: "+sp_str raise ValueError(msg) # Save tail selection for output before we convert it to an integer tail_dt1 = {"less":"less than","two":"different from","greater":"greater than"} tail_dt2 = {"less":"lower","two":"two","greater":"upper"} tail_st1 = tail_dt1[tail] tail_st2 = tail_dt2[tail] # Now convert string-tail to numeric value (lower, two, upper) -> (-1, 0, +1) tail = supported[tail] # Check the setting for the p-value correction if not isinstance(correction,(str,unicode)): raise TypeError("The multiplicity correction scheme has to be specified using a string, not "+\ type(correction).__name__+"!") if useR: supported = ["holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "Fisher",\ "Liptak", "Tippett", "MahalanobisT", "MahalanobisP", "minP", "maxT", "maxTstd",\ "sumT", "Direct", "sumTstd", "sumT2"] if correction not in supported: sp_str = str(supported) sp_str = sp_str.replace('[','') sp_str = sp_str.replace(']','') msg = "The multiplicity correction method given by correction = '"+str(correction)+\ "' is invalid. Available options are: "+sp_str raise ValueError(msg) else: if correction != "maxT": print "WARNING: The stats toolbox in MNE only supports standard Tmax correction of p-values!" # Check if the user wants to see what's going on if not isinstance(verbose,bool): raise TypeError("The switch `verbose` has to be Boolean!") # If a file-name was provided make sure it's a string and check if the path exists if fname != None: if not isinstance(fname,(str,unicode)): raise TypeError("Filename has to be provided as string, not "+type(fname).__name__+"!") fname = str(fname) if fname.find("~") == 0: fname = os.path.expanduser('~') + fname[1:] slash = fname.rfind(os.sep) if slash >= 0 and not os.path.isdir(fname[:fname.rfind(os.sep)]): raise ValueError('Invalid path for output file: '+fname+'!') # Warn if output was turned off but labels were provided and assign default values to labels if necessary # (Do error checking here to avoid a breakdown at the very end of the code...) if verbose == False and fname is None: for chk in [vars,g1str,g2str]: if chk != None: print "WARNING: Output labels were provided but `verbose == False` and `fname == None`. "+\ "The labels will be ignored and no output will be shown/saved!" break else: if vars is None: vars = ['Variable '+str(v) for v in range(1,n_tests+1)] else: if not isinstance(vars,(list,np.ndarray)): raise TypeError('Variable names have to be provided as Python list/NumPy 1darray, not '+\ type(vars).__name__+'!') m = len(vars) if m != n_tests: raise ValueError('Number of variable labels for output and number of tests do not match up!') for var in vars: if not isinstance(var,(str,unicode)): raise TypeError('All variables in the optional input `vars` must be strings!') if g1str is None: g1str = 'Group 1' else: if not isinstance(g1str,(str,unicode)): raise TypeError('The optional column label `g1str` has to be a string!') if g2str is None: g2str = 'Group 2' else: if not isinstance(g2str,(str,unicode)): raise TypeError('The optional column label `g2str` has to be a string!') # Initialize the output dictionary stats_dict = {} # Here we go: in case of paired samples and hatred for R, use Python's mne if paired == True and useR == False: # Try to import/load everything we need below try: import mne except: raise ImportError("The Python module `mne` is not installed!") # Just to double check with user, say what's about to happen print "\nTesting statistical mean-difference of paired samples using the permutation t-test from `mne`" # Perform the actual testing statvals, pvals, dist = mne.stats.permutation_t_test(X-Y,n_permutations=nperms,\ tail=tail,n_jobs=1,verbose=False) # Store result in output dictionary stats_dict['pvals'] = pvals stats_dict['statvals'] = statvals if get_dist: stats_dict['dist'] = dist # Otherwise fire up R and use `flip` else: # Try to import/load everything we need below try: import pandas as pd import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() from rpy2.robjects import pandas2ri pandas2ri.activate() from rpy2.robjects.packages import importr from rpy2.robjects import Formula # Set up our R name-spaces and see if `flip` is available R = rpy2.robjects.r flip = importr('flip') except: msg = "Either the Python modules `pandas` and/or `rpy2` or "+\ "the R package `flip` is/are not installed!" raise ImportError(msg) # Just to double check with user, say what's about to happen print "\nPerforming a permutation "+mth+"-test of "+pairlst[paired]+" samples using the `R` package `flip`" # Construct a list of strings of the form # ['a','b','c',...,'z','aa','ab','ac',...,'az','ba','bb','bc',...] abclist = (list(string.lowercase) + \ [''.join(x) for x in itertools.product(string.lowercase, repeat=2)])[:n_tests] + ['group'] # Use that list to build a string of the form 'a + b + c +...+ aa + ab + ... ~ group' frm_str = abclist[0] for ltr in abclist[1:-1]: frm_str += ' + ' + ltr frm_str += ' ~ group' # Construct an array that will be our factor in the R dataframe below: # all rows of `X` are assigned the factor-level 1, the rest is 2 group = 2*np.ones((nsamples_x + nsamples_y,1)) group[:nsamples_x] = 1 # Stack `X` and `Y` on top of each other, with columns labeled by `abclist` # in case of paired data, also append a stratification vector dfmat = np.hstack([np.vstack([X,Y]),group]) stratarg = rpy2.rinterface.R_NilValue if paired: abclist += ['pairing'] dfmat = np.hstack([dfmat,np.tile(np.arange(1,nsamples_x+1),(1,2)).T]) stratarg = Formula("~pairing") # Create a pandas dataframe with columns labeled by abclist, that we immediately convert to an R-dataframe r_dframe = pandas2ri.py2ri(pd.DataFrame(dfmat,columns=abclist)) # Convert the string to an R formula r_frm = Formula(frm_str) # Do the actual testing in R result = R.flip(r_frm, data=r_dframe, tail=tail, perms=nperms, statTest=mth,\ Strata=stratarg, testType="permutation") result = flip.flip_adjust(result,method=correction) # Extract values from this R nightmare stats_dict['statvals'] = pandas2ri.ri2py(result.slots['res'][1]) stats_dict['pvals'] = pandas2ri.ri2py(result.slots['res'][4]) if get_dist: stats_dict['dist'] = pandas2ri.ri2py(result.slots['permT']) print "Done" # If wanted print/save the results if verbose or fname != None: # Construct string to be used as footer for the output file/last line of command line output permstr = "using "+str(nperms)+" permutations under the alternative hypothesis that "+\ g1str+" is "+tail_st1+" "+g2str+" ("+tail_st2+"-tailed) " if not useR: ft = "Statistical significance of group differences between paired samples was assessed using the "+\ "permutation t-test from the Python package MNE"+\ " (see http://martinos.org/mne/stable/mne-python.html)\n"+\ permstr+"\n"+\ "adjusted for multiple comparisons using the maximal test statistic Tmax. " else: ft = "Statistical significance of group-differences between "+pairlst[paired]+\ " samples was assessed using a "+mth+"-test"\ " from the R library flip (https://cran.r-project.org/web/packages/flip/index.html)\n"+\ permstr+"\n"+\ "adjusted for multiple comparisons based on a "+correction+" approach. \n" # Append an auto-gen message and add current date/time to the soon-to-be footer ft += "Results were computed by stats_tools.py on "+str(datetime.now()) # Call printstats to do the heavy lifting printstats(vars,stats_dict['pvals'],X,Y,g1str,g2str,foot=ft,verbose=verbose,fname=fname) # Return the stocked dictionary return stats_dict
def ps(dosave=True, fname='figures/domains.png', lont=None, latt=None, ht=None, dd=None): ''' Plot Bathymetry of Puget Sound, Admiralty Inlet, and Admiralty Head Inputs: dosave Save figure fname File name for figure lont, latt transect points to plot if they are input ht Depth along transect, if input dd Distance in meters along transect ''' # download bathymetry, which can be found at: http://figshare.com/preview/_preview/1165560 (27.3MB) # Read in bathymetry mat = scipy.io.loadmat('cascadia_gridded.mat') # x and y limits for these plots lonlimsPS = np.array([-124., -122.15]) #-123.21, -122.15]) latlimsPS = np.array([47.02, 48.82]) lonlimsAI = np.array([-122.85, -122.535]) latlimsAI = np.array([47.9665, 48.228]) lonlimsAH = np.array([-122.72, -122.64]) latlimsAH = np.array([48.12, 48.18]) # Functionality copied from https://github.com/clawpack/geoclaw/blob/master/src/python/geoclaw/topotools.py#L873 land_cmap = plt.get_cmap('Greens_r') sea_cmap = plt.get_cmap('Blues_r') cmapPS = colormaps.add_colormaps((land_cmap, sea_cmap), data_limits=[-375,2500], data_break=0.0) cmapAI = 'Blues_r' cmapAH = 'Blues_r' # levels to plot levsPS = np.concatenate((np.arange(-375, 0, 25), np.arange(0,3000,500))) levsAI = np.arange(-200, 20, 20) levsAH = np.arange(-120, 15, 15) # use basemap basemapPS = Basemap(llcrnrlon=lonlimsPS[0], llcrnrlat=latlimsPS[0], urcrnrlon=lonlimsPS[1], urcrnrlat=latlimsPS[1], lat_0=latlimsPS.mean(), lon_0=lonlimsPS.mean(), projection='lcc', resolution='f', area_thresh=0.) xPS, yPS = basemapPS(mat['lon_topo'], mat['lat_topo']) xlimsAI, ylimsAI = basemapPS(lonlimsAI, latlimsAI) xlimsAH, ylimsAH = basemapPS(lonlimsAH, latlimsAH) # Make Puget Sound plot fig = plt.figure(figsize=(16,16)) axPS = fig.add_subplot(111) basemapPS.drawcoastlines(ax=axPS) mappablePS = axPS.contourf(xPS, yPS, mat['z_topo'], cmap=cmapPS, levels=levsPS, zorder=2) locator = MaxNLocator(6) # if you want no more than 10 contours locator.create_dummy_axis() locator.set_bounds(lonlimsPS[0], lonlimsPS[1]) pars = locator() locator = MaxNLocator(6) # if you want no more than 10 contours locator.create_dummy_axis() locator.set_bounds(latlimsPS[0], latlimsPS[1]) mers = locator() basemapPS.drawparallels(mers, dashes=(1, 1), linewidth=0.15, labels=[1,0,0,0], ax=axPS)#, zorder=3) basemapPS.drawmeridians(pars, dashes=(1, 1), linewidth=0.15, labels=[0,0,0,1], ax=axPS)#, zorder=3) cbPS = fig.colorbar(mappablePS, pad=0.015, aspect=35) cbPS.set_label('Height/depth [m]') # Label axPS.text(0.8, 0.025, 'Puget Sound', transform=axPS.transAxes, color='0.15') # Inset magnified plot of Admiralty Inlet axAI = zoomed_inset_axes(axPS, 2, loc=1) basemapPS.drawcoastlines(ax=axAI) basemapPS.fillcontinents('darkgreen', ax=axAI) mappableAI = axAI.contourf(xPS, yPS, mat['z_topo'], cmap=cmapAI, levels=levsAI) axAI.set_xlim(xlimsAI) axAI.set_ylim(ylimsAI) # Inlaid colorbar caxAI = fig.add_axes([0.581, 0.665, 0.011, 0.1]) cbAI = plt.colorbar(mappableAI, cax=caxAI, orientation='vertical') cbAI.ax.tick_params(labelsize=12) # draw a bbox of the region of the inset axes in the parent axes and # connecting lines between the bbox and the inset axes area mark_inset(axPS, axAI, loc1=2, loc2=4, fc="none", ec="0.3", lw=1.5, zorder=5) # Label axAI.text(0.41, 0.83, 'Admiralty\n Inlet', transform=axAI.transAxes, color='0.15', fontsize=16) # Inset magnified plot of Admiralty Head axAH = zoomed_inset_axes(axPS, 9, loc=3) basemapPS.drawcoastlines(ax=axAH) basemapPS.fillcontinents('darkgreen', ax=axAH) mappableAH = axAH.contourf(xPS, yPS, mat['z_topo'], cmap=cmapAH, levels=levsAH) axAH.set_xlim(xlimsAH) axAH.set_ylim(ylimsAH) if plt.is_numlike(lont): # add points if you have some xt, yt = basemapPS(lont, latt) axAH.plot(xt, yt, 'k', lw=3) # Inlaid colorbar caxAH = fig.add_axes([0.398, 0.116, 0.012, 0.15]) cbAH = plt.colorbar(mappableAH, cax=caxAH, orientation='vertical') cbAH.ax.tick_params(labelsize=12) # draw a bbox of the region of the inset axes in the parent axes and # connecting lines between the bbox and the inset axes area mark_inset(axPS, axAH, loc1=2, loc2=4, fc="none", ec="0.3", lw=1.5, zorder=5) # Label axAH.text(0.47, 0.92, 'Admiralty Head', transform=axAH.transAxes, color='0.15', fontsize=16) # pdb.set_trace() if plt.is_numlike(lont): # Add axes to plot transect depths axdepths = fig.add_axes([0.28, 0.39, 0.14, 0.075], zorder=11) axdepths.plot((np.arange(lont.size)*dd)/1000., -ht, '0.2', lw=2, zorder=12) axdepths.tick_params(axis='both', colors='0.1', top='off', right='off', width=2, length=4, labelsize=12, labelcolor='0.1') axdepths.spines['bottom'].set_color('none') axdepths.spines['top'].set_color('none') axdepths.spines['left'].set_color('none') axdepths.spines['right'].set_color('none') axdepths.set_xlabel('Distance along transect [km]', fontsize=14, color='0.1') axdepths.set_ylabel('Transect depth [m]', fontsize=14, color='0.1') axdepths.patch.set_alpha(0.0) # make bg transparent fig.show() # Save figure if dosave: fig.savefig(fname, bbox_inches='tight') fig.show()
fontsize=16) # Inset magnified plot of Admiralty Head if doAH: axAH = zoomed_inset_axes(axPS, 9, loc=1) basemapPS.drawcoastlines(ax=axAH) basemapPS.fillcontinents(cland, ax=axAH) mappableAH = axAH.contourf(xPS, yPS, mat['z_topo'], cmap=cmapAH, levels=levsAH) axAH.set_xlim(xlimsAH) axAH.set_ylim(ylimsAH) if plt.is_numlike(lont): # add points if you have some xt, yt = basemapPS(lont, latt) axAH.plot(xt, yt, 'k', lw=3) # Inlaid colorbar caxAH = fig.add_axes([0.398, 0.116, 0.012, 0.15]) cbAH = plt.colorbar(mappableAH, cax=caxAH, orientation='vertical') cbAH.ax.tick_params(labelsize=12) # draw a bbox of the region of the inset axes in the parent axes and # connecting lines between the bbox and the inset axes area mark_inset(axPS, axAH, loc1=2, loc2=4, fc="none",
def noise_frames_from_etc(N, height_px, width_px, gain=1, band=None, t_exp=None, etc_input=None): """ Generate a series of N noise frames with dimensions (height_px, width_px) based on the output of exposure_time_calc() (in etc.py). A previous ETC output returned by exposure_time_calc() can be supplied, or can be generated if band and t_exp are specified. The output is returned in the form of a dictionary allowing the sky, dark current, cryostat and read noise contributions to be accessed separately. The frame generated by summing each of these components is also generated. Important note: we do NOT create master frames here to aviod confusion. The purpose of this routine is to return individual noise frames that can be added to images. However the master frames must not be created from the same frames that are added to images as this is not realistic. """ print("Generating noise frames...") # The output is stored in a dictionary with each entry containing the noise frames. noise_frames_dict = { 'sky': np.zeros((N, height_px, width_px), dtype=int ), # Note: the sky includes the emission from the telescope. 'dark': np.zeros((N, height_px, width_px), dtype=int), 'cryo': np.zeros((N, height_px, width_px), dtype=int), 'RN': np.zeros((N, height_px, width_px), dtype=int), 'total': np.zeros((N, height_px, width_px), dtype=int), 'gain-multiplied': np.zeros((N, height_px, width_px), dtype=int), 'unity gain': np.zeros((N, height_px, width_px), dtype=int), 'post-gain': np.zeros((N, height_px, width_px), dtype=int) } # Getting noise parameters from the ETC. if not etc_input: if not optical_system: print( "ERROR: if no ETC input is specified, then you must pass an instance of an opticalSystem!" ) raise UserWarning else: # If no ETC input is given then we generate a new one. if plt.is_numlike(t_exp) and band: etc_output = etc.exposure_time_calc( optical_system=optical_system, band=band, t_exp=t_exp) else: print( "ERROR: if no ETC input is specified, then to calculate the noise levels you must also specify t_exp and the imaging band!" ) raise UserWarning else: # Otherwise, we just return whatever was entered. etc_output = etc_input # Adding noise to each image and multiplying by the detector gain where appropriate. noise_frames_dict['sky'] = noise_frames( height_px, width_px, etc_output['unity gain']['N_sky'], N_frames=N) * gain noise_frames_dict['dark'] = noise_frames( height_px, width_px, etc_output['unity gain']['N_dark'], N_frames=N) * gain noise_frames_dict['cryo'] = noise_frames( height_px, width_px, etc_output['unity gain']['N_cryo'], N_frames=N) * gain noise_frames_dict['RN'] = noise_frames(height_px, width_px, etc_output['unity gain']['N_RN'], N_frames=N) noise_frames_dict['total'] = noise_frames_dict['sky'] + noise_frames_dict[ 'cryo'] + noise_frames_dict['RN'] + noise_frames_dict['dark'] noise_frames_dict['gain-multiplied'] = noise_frames_dict[ 'sky'] + noise_frames_dict['cryo'] + noise_frames_dict['dark'] noise_frames_dict[ 'unity gain'] = noise_frames_dict['gain-multiplied'] / gain noise_frames_dict['post-gain'] = noise_frames_dict['RN'] return noise_frames_dict, etc_output
def find_shot_times(shot=None, diag='W7X_UTDU_LP10_I', threshold=0.2, margin=[.3, .7], debug=0, duty_factor=0.12, max_iters=40, secs_rel_t1=False, exceptions=(LookupError), nsamples=2000): """ Return the actual interesting times in utc (absolute) for a given shot, based on the given diag. secs_rel_t1: [False] - if True, return in seconds relative to trigger 1 We use raw data to allow for both 1 and 10 ohm resistors (see above common mode sig) Tricky shots are [20171025,51] # no sweep), 1025,54 - no sweep or plasma See the test routine in __main__ when this is 'run' Returns None if there is a typical expected exception (e.g. LookupError) Occasional problem using a probe signal if startup is delayed - try ECRH Would be good to make sure the start was before t1, but this code does not access t1 at the moment. debug=1 or more plots the result. """ dev_name = "W7X" # diag = W7X_UTDU_LP10_I # has less pickup than other big channels dev = pyfusion.getDevice(dev_name) nsold = pyfusion.NSAMPLES pyfusion.RAW = 1 # allow for both 10 and 1 ohm sensing resistors # This should include a test for interactive use, so big save jobs # don't stall here if margin[0] < 0: wait_for_confirmation( 'You will not save t1? with margin={m}\n {h}(n/q/y)'.format( m=str(margin), h=__doc__)) try: pyfusion.NSAMPLES = nsamples dev.acq.repair = -1 save_db = pyfusion.DEBUG if plt.is_numlike(pyfusion.DEBUG): pyfusion.DEBUG -= 2 # lower debug level to allow us past here data = dev.acq.getdata(shot, diag, exceptions=()) except exceptions as reason: # return None if typical expected exception print('Exception suppressed: ', str(reason), ' on channel', diag) return None except Exception as reason: print('Exception NOT suppressed: ', str(reason), ' on channel', diag) raise finally: # this is executed always, even if the except code returns pyfusion.DEBUG = save_db pyfusion.NSAMPLES = nsold pyfusion.RAW = 0 print('params restored') debug_(pyfusion.DEBUG, 3, key='find_shot_times') # if the timebase is not an int , probably a nan, so use the raw dim in params if there if not isinstance(data.timebase[1], int) and hasattr( data, 'params') and 'diff_dimraw' in data.params: data.timebase = data.params['diff_dimraw'].cumsum() if len(np.unique(np.diff(data.timebase))) > 1: # test for minmax reduction # requires a high DF than a fully sampled signal duty_factor = min(duty_factor * 4, 0.9) tb = data.timebase sig = data.signal if shot[0] > 1e9: # a ns timebase and shot start and end wvalid_times = np.where((tb >= shot[0]) & (tb <= shot[1]))[0] else: # guess suitable bounds for shot wvalid_times = np.where((np.diff(tb) < 1e8) & (np.diff(tb) > 0))[0] # We take care to get a good estimate of the sampled length # The previous simple version got into a loop because of inaccurate calcs. tsamplen = tb[wvalid_times[-1]] - tb[wvalid_times[0]] print(shot[0], len(wvalid_times), 'valid time values', len(tb) - len(wvalid_times), 'invalid') sig = sig[wvalid_times] tb = tb[wvalid_times] for trial in range(max_iters): wbig = np.where(np.abs(sig) > np.abs(threshold))[0] if len(wbig) < 5: threshold *= 0.8 continue times = np.array([tb[wbig.min()], tb[wbig.max()]], dtype=np.int64) if debug > 1: print([tmm / 1e9 for tmm in times], np.diff(times) / 1e9, wbig.min(), wbig.max(), 'tsamplen (s) = ', tsamplen / 1e9) # fract_samples > 0.2 fract time avoids influence of spikes fract_time = (times[1] - times[0]) / float(tsamplen) fract_samples = len(wbig) / float(len(tb)) if debug > 0: print( 'trial {t}, lentb {lentb}, thresh {thresh:.3f}, fract_time {fract_time:.3f}, fract_samples {fract_samples:.3f}, DF {DF:.3f}' .format(t=trial, lentb=len(tb), thresh=threshold, fract_time=fract_time, fract_samples=fract_samples, DF=duty_factor)) if fract_time > 0.95 and fract_samples / fract_time > duty_factor: threshold *= 1.2 continue shortest = 0.2 * 1e9 / tsamplen # want to keep pulses of 0.2 sec even if on 20 sec stream if fract_time < min( 0.05, shortest) or fract_samples / fract_time < duty_factor: threshold *= 0.9 continue break else: # went through the whole loop (i.e. without success) pyfusion.utils.warn( 'Too few/many points above threshold on shot {shot}'.format( shot=str(shot))) if debug > 1: plt.figure() data.plot_signals() plt.show() return None timesplus = np.array( [times[0] - margin[0] * 1e9, times[1] + margin[1] * 1e9], dtype=np.int64) print('{sh}: shot length={dt}, {timesplus}'.format( sh=shot, dt=np.diff(times) / 1e9, timesplus=(timesplus - tb[0]) / 1e9)) if debug > 0: plt.figure() # need a new fig whilever we plt in absolute times data.plot_signals() plt.plot(timesplus, [threshold, threshold], 'o--r') plt.plot(timesplus, [-threshold, -threshold], 'o--r') plt.xlim(2 * timesplus - times) # double margin for plot plt.show() fact = 1 if secs_rel_t1: utc_0 = data.params.get('utc_0', 0) if utc_0 == 0: raise LookupError("no t1 value, so can't select secs_rel_t1") else: fact = 1e-9 else: utc_0 = 0 print(fact, utc_0) return ((timesplus - utc_0) * fact)