def read_file(self, fn=None, header=0, g_inx=5, d_inx=6): ''' This method reads a csv file. PARAMETERS: fn: string - The name of the file to be read with path header: int - Number of header rows. 0 if no header, default. ''' if fn == None: wr.warning('Filename could not be found.') else: if header == 0: self.dat = pd.read_csv(fn, header=None) else: self.dat = pd.read_csv(fn, skiprows=range(header), header=None) # Select rows global_list amd diffuse_list self.global_list = self.dat.iloc[:, g_inx].values self.diffuse_list = self.dat.iloc[:, d_inx].values
def _unpack_args(self, num): warnings.warning('Deprecated and will be removed', DeprecationWarning) nargs = self.args if len(nargs) != 1: raise TypeError('{} directive expected exactly {} argument(s), ' 'got {}'.format(self.name, num, nargs)) return self.args
def feat_from_raw(raw): # see features.py sampwidth = 2 nchannels = 1 nframes = len(raw) / sampwidth out = struct.unpack_from("%dh" % nframes * nchannels, raw) sig = np.reshape(np.array(out), (-1, nchannels)).squeeze() sig = sig.astype(np.float32) shp = sig.shape # wav should contain a single channel assert len(shp) == 1 or (len(shp) == 2 and shp[1] == 1) sig *= (2**(15 - sampwidth)) with warnings.catch_warnings() as w: # ignore warnings resulting from empty signals parts warnings.filterwarnings('ignore', message='divide by zero encountered in log', category=RuntimeWarning, module='sidekit') _, loge, _, mspec = mfcc(sig.astype(np.float32), get_mspec=True) # Management of short duration segments difflen = 0 if len(loge) < 68: difflen = 68 - len(loge) warnings.warning( "media %s duration is short. Robust results require length of at least 720 milliseconds" % wavname) mspec = np.concatenate((mspec, np.ones((difflen, 24)) * np.min(mspec))) #loge = np.concatenate((loge, np.ones(difflen) * np.min(mspec))) return mspec, loge, difflen
def append(self, tensor, check_unique=False): """ Insert a new tensor at the end of the index. Be advised that this operation is linear on index size ($O(n)$). Parameters ---------- tensor: numpy.ndarray or list A vector to insert into index. check_unique (optional, default: False): bool Defines if append method should verify the existence of a really similar tensor on the current index. In other words, it checks for the unicity of the value. Be advised that this check creates an overhead on the append process. """ if sum(tensor) == 0.: raise NullTensorError if self._is_new_index: index_it = True if check_unique and len(self) > 1: self.tree.build(self.size << intmul >> self.trees) result = self.item( self.index(tensor), top=1, distances=True ) if result[1][0] <= .05: warning( 'Tensor being indexed already exists in ' 'the database and the check for duplicates ' 'are on. Refusing to store again this tensor.' ) index_it = False self.tree.unbuild() if index_it: self.tree.add_item(len(self), tensor) else: with Index(self.size, volatile=True, trees=self.trees) as tmp_idx: for value in self.values(): tmp_idx.append(value, check_unique) tmp_idx.append(tensor, check_unique) _temp_file = tmp_idx.path move(_temp_file, self.path) self.refresh()
def set_block(self, row, col, value): assert row >= 0 and col >= 0, 'Indices must be positive' assert row < self.bshape[0] and col < self.bshape[ 1], 'Indices out of range' if value is None: self._blocks[row, col] = None self._block_mask[row, col] = False else: if isinstance(value, BaseBlockMatrix): assert_block_structure(value) elif isinstance(value, np.ndarray): if value.ndim != 2: msg = 'blocks need to be sparse matrices or BlockMatrices' raise ValueError(msg) msg = 'blocks need to be sparse matrices or BlockMatrices; a numpy array was given; copying the numpy array to a coo_matrix' logger.warning(msg) warnings.warning(msg) value = coo_matrix(value) else: assert isspmatrix( value ), 'blocks need to be sparse matrices or BlockMatrices' nrows, ncols = value.shape self.set_row_size(row, nrows) self.set_col_size(col, ncols) self._blocks[row, col] = value self._block_mask[row, col] = True
def fixed_get(self, key): import os import errno import warnings from webassets.cache import make_md5 if not os.path.exists(self.directory): error_logger.warning("Cache directory {} doesn't exist, not going " "to attempt to read cache file".format(self.directory)) return None try: hash = make_md5(self.V, key) except IOError as e: if e.errno != errno.ENOENT: raise return None filename = os.path.join(self.directory, '%s' % hash) try: f = io.open(filename, 'rb') except IOError as e: if e.errno != errno.ENOENT: error_logger.exception("Got an exception while trying to open webasset file {}".format(filename)) return None try: result = f.read() finally: f.close() unpickled = webassets.cache.safe_unpickle(result) if unpickled is None: warnings.warning('Ignoring corrupted cache file %s' % filename) return unpickled
def get_prepped_inputs(self, chunking, array_shuffle_seed=None, **kwargs): # pylint: disable=too-many-locals sup = super(BaseClassSubsamplingSteppedInputsProvider, self) inputs = sup.get_prepped_data_label(chunking, **kwargs) # shuffle seeds for order of steps, and shuffling keeps if array_shuffle_seed is None: kseed, seed = None, None elif isinstance(array_shuffle_seed, (int, np.int_)): nr.seed(array_shuffle_seed) kseed, seed = nr.randint(41184535, size=2) keeps = self.keeping_decision(inputs, keep_seed=kseed, **kwargs) if keeps.shape[0] < 1: warning("Sub-sampling has resulted in zero-length selection, " "ratios used: {} from given {}".format( self.ratios, self._user_ratios)) for _ in range(self.steps_per_chunk): # zero-length, but we honor the steps per chunk yield [i[keeps, ...] for i in inputs] starts, ends, aseed = self.se_for_chunksteps_maybeshuffled( len(keeps), shuffle_seed=seed, **kwargs) keeps = self.maybe_shuffle_array(keeps, aseed) for s, e in zip(starts, ends): yield [i[keeps[s:e], ...] for i in inputs]
def se_for_chunksteps_maybeshuffled(self, len_input, shuffle_seed=None, **kwargs): # pylint: disable=unused-argument # don't want to kill the training man if len_input < self.steps_per_chunk: warning("chunk size is smaller than steps_per_chunk: " "{} v/s {}, will use the smaller value".format( len_input, self.steps_per_chunk)) spc = len_input else: spc = self.steps_per_chunk nsteps = len_input // spc starts = tuple(range(0, nsteps * spc, nsteps)) ends = starts[1:] + (len_input, ) # NOTE: I know that we can get pre-shuffled data from sup.get_prepped_inputs, # but ... This is to test an API, because ... there will be other implementations # that cannot shuffle the data as is ... like the ones that use striding tricks # shuffle seeds for order of steps, and shuffling data if shuffle_seed is None: oseed, aseed = None, None elif isinstance(shuffle_seed, (int, np.int_)): nr.seed(shuffle_seed) oseed, aseed = nr.randint(41184535, size=2) starts = self.maybe_shuffle_array(np.array(starts), oseed) ends = self.maybe_shuffle_array(np.array(ends), oseed) return starts, ends, aseed
def strip_html(html, include_metatags=True): try: html = recursively_decode_html_entities(html) except: e = sys.exc_info()[0] logging.warning( 'Exception during recursively_decode_html_entities: %s', e) try: soup = bs4.BeautifulSoup(html, 'lxml') except: warnings.warning('lxml not found; unable to strip HTML.') return None # Remove javascript. [s.extract() for s in soup('script')] # Remove css. [s.extract() for s in soup('style')] content = [] # First, extract meta tags. if include_metatags: content.extend( meta['content'] for meta in soup('meta') if 'content' in meta) # Add text content from the page. content.append(soup.get_text(' ', strip=True)) return ' '.join(content)
def _maybe_promote_st(dtype): """ Modified version of _maybe_promote found from xarray. This allows for ability to provide null values to ints and 'S1' datatype """ # print('boobga') # N.B. these casting rules should match pandas if np.issubdtype(dtype, float): fill_value = np.nan elif np.issubdtype(dtype, int): # dtype = int fill_value = 0 elif np.issubdtype(dtype, complex): fill_value = np.nan + np.nan * 1j elif np.issubdtype(dtype, np.datetime64): fill_value = np.datetime64('NaT') elif np.issubdtype(dtype, np.timedelta64): fill_value = np.timedelta64('NaT') elif np.issubdtype(dtype, 'S'): fill_value = '' # fill with empty strings else: warnings.warning('CHECK THIS DATATYPE: ' + str(dtype)) dtype = object fill_value = np.nan return np.dtype(dtype), fill_value
def strip_html(html, include_metatags=True): assert isinstance(html, str) try: html = recursively_decode_html_entities(html) except: logging.warning( 'Exception during recursively_decode_html_entities: %s', sys.exc_info()[:2]) try: soup = bs4.BeautifulSoup(html, 'lxml') except: warnings.warning('lxml not found; unable to strip HTML.') return None # Remove javascript. [s.extract() for s in soup('script')] # Remove css. [s.extract() for s in soup('style')] content = [] # First, extract meta tags. if include_metatags: content.extend(meta['content'] for meta in soup('meta') if 'content' in meta) # Add text content from the page. content.append(soup.get_text(' ', strip=True)) return ' '.join(content)
def init_downsampling(): global _ALREADY_APPLIED if _ALREADY_APPLIED: warnings.warning("Filtering to verify robustness should be applied " "only once (init_downsampling was called multiple " "times). This call has no effect.") return _ALREADY_APPLIED = True _draw_allowed_municipalities() _apply_downsampling_hooks() file_spec_part = "_robustness_s_%d_p_%f" % (ROBUSTNESS_SEED, ROBUSTNESS_PERCENTAGE) valid_in_num = "0123456789eE-abcdefghijklmnopqrstuvwxyz" file_spec_part = "".join( [k if k in valid_in_num else "_" for k in file_spec_part]) add_output_dir_postfix(file_spec_part) if is_quiet(): print("Downsampled to %d municipalities" % len(_ALLOWED_MUNICIPALITIES)) else: print( "Allowed municipalities (%d): %s" % (len(_ALLOWED_MUNICIPALITIES), ",".join(_ALLOWED_MUNICIPALITIES)))
def _reassociate(node, parent): if isinstance(node, (Symbol, Div)): return elif isinstance(node, Par): _reassociate(node.child, node) elif isinstance(node, (Sum, Sub, FunCall)): for n in node.children: _reassociate(n, node) elif isinstance(node, Prod): children = explore_operator(node) # Reassociate symbols symbols = [n for n, p in children if isinstance(n, Symbol)] # Capture the other children and recur on them other_nodes = [(n, p) for n, p in children if not isinstance(n, Symbol)] for n, p in other_nodes: _reassociate(n, p) # Create the reassociated product and modify the original AST children = zip(*other_nodes)[0] if other_nodes else () children += tuple(sorted(symbols, key=reorder)) reassociated_node = ast_make_expr(Prod, children, balance=False) parent.children[parent.children.index(node)] = reassociated_node else: warning('Unexpect node of type %s while reassociating', typ(node))
def test_oidc_config_fields(app, client): """ Test that the configuration response at least contains the required fields. For fields which are recommended but not required, issue a warning. """ response = client.get("/.well-known/openid-configuration") assert response.status_code == 200, response.data # Check for required fields. required_fields = [ "issuer", "authorization_endpoint", "token_endpoint", "jwks_uri", "response_types_supported", "subject_types_supported", "id_token_signing_alg_values_supported", ] for field in required_fields: assert field in response.json # For recommended fields, warn if not contained in the response. recommended_fields = [ "userinfo_endpoint", "registration_endpoint", "scopes_supported", "claims_supported", ] for field in recommended_fields: if field not in response.json: warnings.warning( "OIDC configuration response missing recommended field: " + field)
def zinterp_jnu(self, zval, use_nearest=False): """Interpolate the Jnu grid at a given redshift Parameters ---------- zval : float Redshift use_nearest : bool, optional Use nearest redshift instead?? """ # Do not interpolate beyond limits minz = np.min(self.z) maxz = np.max(self.z) if zval < minz: warn.warning('Input z was lower than z grid') print('Using z={:g}'.format(minz)) return self.Jnu[:, 0].flatten() if zval > maxz: warn.warning('Input z was larger than z grid') print('Using z={:g}'.format(maxz)) return self.Jnu[:, -1].flatten() # Find nearest? if use_nearest: idx = np.argmin(np.abs(self.z-zval)) return self.Jnu[:, idx].flatten() # Interpolate nval = self.energy.shape[0] jnu = np.zeros(nval) for ii in range(nval): jnu[ii] = interp1d(self.z, self.Jnu[ii, ])(zval) return jnu * self.Jnu.unit
def zinterp_jnu(self, zval, use_nearest=False): """Interpolate the Jnu grid at a given redshift Parameters ---------- zval : float Redshift use_nearest : bool, optional Use nearest redshift instead?? """ # Do not interpolate beyond limits minz = np.min(self.z) maxz = np.max(self.z) if zval < minz: warn.warning('Input z was lower than z grid') print('Using z={:g}'.format(minz)) return self.Jnu[:, 0].flatten() if zval > maxz: warn.warning('Input z was larger than z grid') print('Using z={:g}'.format(maxz)) return self.Jnu[:, -1].flatten() # Find nearest? if use_nearest: idx = np.argmin(np.abs(self.z - zval)) return self.Jnu[:, idx].flatten() # Interpolate nval = self.energy.shape[0] jnu = np.zeros(nval) for ii in range(nval): jnu[ii] = interp1d(self.z, self.Jnu[ii, ])(zval) return jnu * self.Jnu.unit
def compute_fiducial(wcslist, bounding_box=None, domain=None): """ For a celestial footprint this is the center. For a spectral footprint, it is the beginning of the range. This function assumes all WCSs have the same output coordinate frame. """ if domain is not None: warnings.warning( "'domain' was deprecated in 0.8 and will be removed from next" "version. Use 'bounding_box' instead.") axes_types = wcslist[0].output_frame.axes_type spatial_axes = np.array(axes_types) == 'SPATIAL' spectral_axes = np.array(axes_types) == 'SPECTRAL' footprints = np.hstack( [w.footprint(bounding_box=bounding_box).T for w in wcslist]) spatial_footprint = footprints[spatial_axes] spectral_footprint = footprints[spectral_axes] fiducial = np.empty(len(axes_types)) if (spatial_footprint).any(): lon, lat = spatial_footprint lon, lat = np.deg2rad(lon), np.deg2rad(lat) x_mean = np.mean(np.cos(lat) * np.cos(lon)) y_mean = np.mean(np.cos(lat) * np.sin(lon)) z_mean = np.mean(np.sin(lat)) lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0 lat_fiducial = np.rad2deg( np.arctan2(z_mean, np.sqrt(x_mean**2 + y_mean**2))) fiducial[spatial_axes] = lon_fiducial, lat_fiducial if (spectral_footprint).any(): fiducial[spectral_axes] = spectral_footprint.min() return fiducial
def compute_fiducial(wcslist, bounding_box=None, domain=None): """ For a celestial footprint this is the center. For a spectral footprint, it is the beginning of the range. This function assumes all WCSs have the same output coordinate frame. """ if domain is not None: warnings.warning("'domain' was deprecated in 0.8 and will be removed from next" "version. Use 'bounding_box' instead.") axes_types = wcslist[0].output_frame.axes_type spatial_axes = np.array(axes_types) == 'SPATIAL' spectral_axes = np.array(axes_types) == 'SPECTRAL' footprints = np.hstack([w.footprint(bounding_box=bounding_box).T for w in wcslist]) spatial_footprint = footprints[spatial_axes] spectral_footprint = footprints[spectral_axes] fiducial = np.empty(len(axes_types)) if (spatial_footprint).any(): lon, lat = spatial_footprint lon, lat = np.deg2rad(lon), np.deg2rad(lat) x_mean = np.mean(np.cos(lat) * np.cos(lon)) y_mean = np.mean(np.cos(lat) * np.sin(lon)) z_mean = np.mean(np.sin(lat)) lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0 lat_fiducial = np.rad2deg(np.arctan2(z_mean, np.sqrt(x_mean ** 2 + y_mean ** 2))) fiducial[spatial_axes] = lon_fiducial, lat_fiducial if (spectral_footprint).any(): fiducial[spectral_axes] = spectral_footprint.min() return fiducial
def __init__(self, n_workers=None, threads_per_worker=None, processes=True, loop=None, start=True, ip=None, scheduler_port=0, silence_logs=logging.CRITICAL, diagnostics_port=8787, services={}, worker_services={}, nanny=None, **worker_kwargs): if nanny is not None: warnings.warning("nanny has been deprecated, used processes=") processes = nanny self.status = None self.processes = processes self.silence_logs = silence_logs if silence_logs: for l in ['distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny']: logging.getLogger(l).setLevel(silence_logs) if n_workers is None and threads_per_worker is None: if processes: n_workers = _ncores threads_per_worker = 1 else: n_workers = 1 threads_per_worker = _ncores if n_workers is None and threads_per_worker is not None: n_workers = max(1, _ncores // threads_per_worker) if n_workers and threads_per_worker is None: # Overcommit threads per worker, rather than undercommit threads_per_worker = max(1, int(math.ceil(_ncores / n_workers))) self.loop = loop or IOLoop() if start and not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) if diagnostics_port is not None: try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: logger.debug("To start diagnostics web server please install Bokeh") else: services[('bokeh', diagnostics_port)] = BokehScheduler self.scheduler = Scheduler(loop=self.loop, services=services) self.scheduler_port = scheduler_port self.workers = [] self.n_workers = n_workers self.threads_per_worker = threads_per_worker self.worker_services = worker_services self.worker_kwargs = worker_kwargs if start: sync(self.loop, self._start, ip) clusters_to_close.add(self)
def memory(self): """Return function's memory attribute""" try: return self.function.memory except: warnings.warning( f'Function of {self.name} (self.function.name) has no memory attribute' )
def __setitem__(self, name, value): v = self.__check(value, name) if v is not None: if name in self.__data: warning( f"Overwriting field \"{name}\" which is already present in entry" ) self.__data[name] = v
def __getField(self, bibItem, field, required=False): if field in bibItem: return field if field in self.fieldAliases: for alias in self.fieldAliases[field]: if alias in bibItem: return alias if required: warning(f"Could not find required field: {field}")
def plot_3d_field_line(magnetic_field, xpos, zpos, yperiod, cycles=20, y_res=50): """Make a 3D plot of field lines Inputs ------ magnetic_field - Magnetic field object xpos Starting X location. Can be scalar or list/array zpos Starting Z location. Can be scalar or list/array yperiod Length of period in y domain cycles - Number of times to go round in y [20] y_res - Number of points in y in each cycle [50] """ if not plotting_available: warnings.warning("matplotlib not available, unable to plot") return yperiod = float(yperiod) # Go round toroidally cycles times phivals_hires = np.linspace(0, cycles * yperiod, num=y_res * cycles, endpoint=False) xpos = np.asfarray(xpos) zpos = np.asfarray(zpos) field_tracer = fieldtracer.FieldTracer(magnetic_field) result_hires = field_tracer.follow_field_lines(xpos, zpos, phivals_hires) # Get phivals_hires into [0,yperiod] phivals_hires_mod = np.remainder(phivals_hires, yperiod) # There are cycles sets of field lines y_res points long each # and we also need to transpose for reasons phivals_hires_mod = phivals_hires_mod.reshape((cycles, y_res)).T # Same for the result, but only swap first and second indices result_hires_mod = result_hires.reshape( (cycles, y_res, 2)).transpose(1, 0, 2) fig = plt.figure() ax = fig.gca(projection='3d') for n in range(cycles): ax.plot(result_hires_mod[:, n, 0], result_hires_mod[:, n, 1], phivals_hires_mod[:, n]) plt.show() return fig, ax
def extract(self, **kwargs): """Return a dictionary of hoistable subexpressions.""" if not self._check_loops(self.expr_info.loops): warning("Loop nest unsuitable for generalized licm. Skipping.") return symbols = visit(self.expr_info.outermost_parent)['symbols_dep'] symbols = dict((s, [l.dim for l in dep]) for s, dep in symbols.items()) return self._extract(self.stmt.rvalue, symbols, **kwargs)
def golist_to_collapsed_gene_list(self, go_list): gene_set = set() for go_term in go_list: if go_term in self.go_to_gene_dict: gene_set.update(self.go_to_gene_dict[go_term]) else: warnings.warning("Warning: GO Term " + go_term + " not found in the GO-to-gene dictionary.") return list(gene_set)
def __init__(self, size=CACHE_LINE_SIZE - HEADER_SIZE, **kwargs): sz = size + self.HEADER_SIZE if sz & (sz - 1) or sz % CACHE_LINE_SIZE: warnings.warning( "Size of state counter should be multiple of {} or smaller" "power of two sans header size ({}), perfect size is {}". format(CACHE_LINE_SIZE, self.HEADER_SIZE, CACHE_LINE_SIZE - self.HEADER_SIZE)) self.size = size super().__init__(**kwargs)
def get_name_from_target(target): import warnings, inspect previous_frame = inspect.currentframe().f_back (filename, line_number, function_name, lines, index) = inspect.getframeinfo(previous_frame) stack = inspect.stack()[1] warnings.warning( f"deprecated, use 'get_attribute_string(ast_object)' instead. Called from {stack[3]} ( {stack[1]}: {stack[2]})", DeprecationWarning) return get_attribute_string(target)
def _send_ping(self, interval, event): while not event.wait(interval): self.last_ping_tm = time.time() if self.sock: try: tradeStr = {"uri": "ping"} params = json.dumps(tradeStr) self.sock.ping(payload=params) except Exception as ex: warnings.warning("send_ping routine terminated: {}".format(ex)) break
def recall(y_true, y_pred, argsort_kind='quicksort'): """Computes the Recall values w.r.t. descending `y_pred` values. Parameters ---------- y_true: array, shape = [n_samples] True values, interpreted as strictly positive or not (i.e. converted to binary). Could be in {-1, +1} or {0, 1} or {False, True}. y_pred: array, shape = [n_samples] Predicted values. argsort_kind: str Sorting algorithm. Returns ------- rec: array, shape = [n_samples] Recall array. """ # -- basic checks and conversion assert len(y_true) == len(y_pred) assert np.isfinite(y_true).all() assert np.isfinite(y_pred).all() y_true = np.array(y_true, dtype=DTYPE) assert y_true.ndim == 1 y_pred = np.array(y_pred, dtype=DTYPE) assert y_pred.ndim == 1 n_uniques = np.unique(y_pred) if n_uniques.size == 1: raise ValueError('Rank of predicted values is ill-defined' ' because all elements are equal') elif n_uniques.size < y_pred.size: warning('some predicted elements have exactly the same value.' ' output will most probably depend on the sorting' ' method used. Here "%s"' % argsort_kind) # -- actual computation idx = (-y_pred).argsort(kind=argsort_kind) tp = (y_true[idx] > 0).cumsum(dtype=DTYPE) y_true_n_pos = (y_true > 0).sum(dtype=DTYPE) if y_true_n_pos == 0: rec = np.zeros(tp.shape, dtype=DTYPE) else: rec = tp / y_true_n_pos return rec
def execute_async(self, key, command, queue=None): if queue is not None: warnings.warning( 'DaskExecutor does not support queues. All tasks will be run ' 'in the same cluster') def airflow_run(): return subprocess.check_call(command, shell=True) future = self.client.submit(airflow_run, pure=False) self.futures[future] = key
def addto_hdr(paramname, value): if header_params[paramname] == 'd': return prep_double(paramname, value) elif header_params[paramname] == 'i': return prep_int(paramname, value) elif header_params[paramname] == 'str': return prep_string(paramname) + prep_string(value) elif header_params[paramname] == 'flag': return prep_string(paramname) else: warnings.warning("key '%s' is unknown!" % paramname) return hdr
def parseFile(self, bibFile): """ parseFile(bibFile) Parses the given ``*.bib`` file for entries and loads it in underlying data. :param bibFile: The :class:`file` instance to be parsed. """ OUTSIDE = 0 ENTRY_TYPE = 1 COMMENT = 2 self.__line = 0 self.__column = 0 mode = OUTSIDE c = '\n' while True: if c == '\n': self.__line += 1 self.__column = 0 c = bibFile.read(1) self.__column += 1 if not c: self.__line = 0 self.__column = 0 return if mode == OUTSIDE: if not c.strip(): continue elif c == '@': mode = ENTRY_TYPE entryType = '' elif c == '%': prevMode = mode mode = COMMENT else: warning(f"Omitted unexpected charater: \"{c}\"") elif mode == COMMENT: if c == '\n': mode = prevMode elif mode == ENTRY_TYPE: if c == '{': try: self.__iadd__(self.parseEntry(bibFile, entryType.strip().lower())) except (ValueError): warning(f"Encountered unsupported bibTeX entry: {entryType.strip().lower()}") mode = OUTSIDE elif c == '%': prevMode = mode mode = COMMENT else: entryType += c
def new_func(*args, **kwargs): warnings.simplefilter('always', DeprecationWarning) # turn off filter warnings.warning( 'Use {0} instead of {1}, {1} will be removed in the future.'. format(new_name, func.__name__), category=DeprecationWarning, stacklevel=2, ) warnings.simplefilter('default', DeprecationWarning) # reset filter return func(*args, **kwargs)
def isfullhouse(cards): cardnums = [card[:-1] for card in cards] c = Counter(cardnums) #print("C is infullhouse",c) highest = c.most_common(2)[0] try: second = c.most_common(2)[1]l except: warnings.warning("Got Five of a Kind or Something") if (highest[1] == 3 and second[1] == 2): return True else: return False
def set_fields(self, ra=None, dec=None, **kwargs): """ """ kwargs["width"] = kwargs.get("width", self.width) kwargs["height"] = kwargs.get("height", self.height) self._side_properties["fields"] = SurveyFieldBins(ra, dec, **kwargs) if self.cadence is not None and np.any(np.isnan( self.cadence['field'])): warnings.warning( "cadence was already set, field pointing will be updated") self._update_field_radec()
def plot_streamlines(grid, magnetic_field, y_slice=0, width=None, **kwargs): """Plot streamlines of the magnetic field in the poloidal plane Parameters ---------- grid : :py:obj:`zoidberg.grid.Grid` Grid generated by Zoidberg magnetic_field : :py:obj:`zoidberg.field.MagneticField` Zoidberg magnetic field object y_slice : int, optional y-index to plot streamlines at width : float, optional If not None, line widths are proportional to the magnitude of the `magnetic_field` times `width` Returns ------- fig, ax The matplotlib figure and axis used """ if not plotting_available: warnings.warning("matplotlib not available, unable to plot") return fig, ax = plt.subplots(1, 1) full_slice = np.s_[:, y_slice, :] if width is not None: # Get the B field magnitude in the poloidal plane bxz_mag = np.sqrt(magnetic_field.b_mag**2 - magnetic_field.by**2) linewidth = width * (bxz_mag[full_slice] / bxz_mag.max()).T else: linewidth = 1 ax.streamplot(grid.xarray, grid.zarray, magnetic_field.bx[full_slice].T, magnetic_field.bz[full_slice].T, linewidth=linewidth, **kwargs) ax.set_xlabel("Radius [m]", fontsize=20) ax.set_ylabel("Height [m]", fontsize=20) ax.tick_params(axis='both', labelsize=15) plt.show() return fig, ax
def check_arguments(args): ''' Checks to make sure train or test or both modes :param args.train :param args.test ''' parser = argparse.ArgumentParser() if args.cluster: warnings.warning('Cluster module is still not fully functional') if not (args.train or args.test): parser.error('No action requested, add --train or --test') if (args.test) and not (args.test_input): parser.error("If testing, must specify test data, use -t/--test_input\ <<DATAFILE>>")
def _recoil(self): """Increase the stack size if the kernel arrays exceed the stack limit threshold (at the C level).""" # Assume the size of a C type double is 8 bytes c_double_size = 8 # Assume the stack size is 1.7 MB (2 MB is usually the limit) stack_size = 1.7*1024*1024 decls = [d for d in self.decls.values() if d.sym.rank] size = sum([reduce(operator.mul, d.sym.rank) for d in decls]) if size * c_double_size > stack_size: # Increase the stack size if the kernel's stack size seems to outreach # the space available try: resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) except resource.error: warning("Stack may blow up, and could not increase its size.")
def expand(self, mode='standard', **kwargs): """Expand expressions over other expressions based on different heuristics. In the simplest example one can have: :: (X[i] + Y[j])*F + ... which could be transformed into: :: (X[i]*F + Y[j]*F) + ... When creating the expanded object, if the expanding term had already been hoisted, then the expansion itself is also lifted. For example, if: :: Y[j] = f(...) (X[i]*Y[j])*F + ... and we assume it has been decided (see below) the expansion should occur along the loop dimension ``j``, the transformation generates: :: Y[j] = f(...)*F (X[i]*Y[j]) + ... One may want to expand expressions for several reasons, which include * Exposing factorization opportunities; * Exposing high-level (linear algebra) operations (e.g., matrix multiplies) * Relieving register pressure; when, for example, ``(X[i]*Y[j])`` is computed in a loop L' different than the loop L'' in which ``Y[j]`` is evaluated, and ``cost(L') > cost(L'')``; :param mode: multiple expansion strategies are possible, each exposing different, "hidden" opportunities for later code motion. * mode == 'standard': this heuristics consists of expanding along the loop dimension appearing the most in different (i.e., unique). This aims at making factorization more effective. * mode == 'all': expand when symbols depend on at least one of the expression's dimensions * mode == 'domain': expand when symbols depending on the expressions's domain are encountered. * mode == 'outdomain': expand when symbols independent of the expression's domain are encountered. """ if mode == 'standard': retval = FindInstances.default_retval() symbols = FindInstances(Symbol).visit(self.stmt.rvalue, ret=retval)[Symbol] # The heuristics privileges domain dimensions dims = self.expr_info.out_domain_dims if not dims or self.expr_info.dimension >= 2: dims = self.expr_info.domain_dims # Get the dimension occurring most often occurrences = [tuple(r for r in s.rank if r in dims) for s in symbols] occurrences = [i for i in occurrences if i] if not occurrences: return self # Finally, establish the expansion dimension dimension = Counter(occurrences).most_common(1)[0][0] should_expand = lambda n: set(dimension).issubset(set(n.rank)) elif mode in ['all', 'domain', 'outdomain']: info = visit(self.expr_info.outermost_loop, info_items=['symbols_dep']) symbols = defaultdict(set) for s, dep in info['symbols_dep'].items(): symbols[s.symbol] |= {l.dim for l in dep} if mode == 'all': should_expand = lambda n: symbols.get(n.symbol) and \ any(r in self.expr_info.dims for r in symbols[n.symbol]) elif mode == 'domain': should_expand = lambda n: symbols.get(n.symbol) and \ any(r in self.expr_info.domain_dims for r in symbols[n.symbol]) elif mode == 'outdomain': should_expand = lambda n: symbols.get(n.symbol) and \ not symbols[n.symbol].issubset(set(self.expr_info.domain_dims)) else: warning('Unknown expansion strategy. Skipping.') return # Perform the expansion self.expr_expander.expand(should_expand, kwargs.get('not_aggregate')) # Update known declarations self.decls.update(self.expr_expander.expanded_decls) return self
def startfit(self): warnings.warning("Method renamed to startFit", DeprecationWarning) self.startFit()
def cv_timeresolved(spiketrain, win=None, start=None, stop=None, step=None): """ Evaluate the empirical coefficient of variation (CV) of the inter-spike intervals (ISIs) of one spike train (or a list of spike trains). By default computes the CV over the full time span of the data. However, it can compute the CV time-resolved as well. Given the vector v containing the observed ISIs of one spike train in the time window [t0, t1], the CV in [t0, t1] is defined as CV := std(v)/mean(v). The CV of a list of spike trains is computed collecting the ISIs of all spike trains. The CV represents a measure of irregularity in the spiking activity. For For a time-stationary Poisson process, the theoretical CV is 1. Arguments --------- spiketrain : SpikeTrain or list of SpikeTrains a neo.SpikeTrain object (or a list of), for which to compute the CV win : Quantity (optional) the length of the time windows over which to compute the CV. If None, the CV is computed over the largest window possible; otherwise, the window slides along time (see argument 'step') Default: None start : Quantity, optional initial time for the computation of the CV. If None, the largest t_start among those of the input spike trains in `spiketrain` is used Default: None stop : Quantity, optional last time for the computation of the CV. If None, the smallest t_stop among those of the input spike trains in `spiketrain` is used Default: None step : Quantity, optional Time shift between two consecutive sliding windows. If None, successive windows are adjacent Default: None Returns ------- values : array Array of CV values computed over consecutive time windows windows : array Array of shape (n, 2) of time windows over which the CV has been computed """ # Convert spiketrain to a list if it is a SpikeTrain if type(spiketrain) == neo.core.SpikeTrain: spiketrain = [spiketrain] max_tstart = min([t.t_start for t in spiketrain]) min_tstop = max([t.t_stop for t in spiketrain]) if not (all([max_tstart == t.t_start for t in spiketrain]) and all([min_tstop == t.t_stop for t in spiketrain])): warnings.warning('spike trains have different t_start or t_stop' ' values. CV computed for inner values only') t_start = max_tstart if start is None else start t_stop = min_tstop if stop is None else stop wlen = t_stop - t_start if win is None else win wstep = wlen if step is None else step # Convert all time quantities in dimensionless (_dl) units (meant in s) start_dl = float(t_start.simplified.base) stop_dl = float(t_stop.simplified.base) wlen_dl = float(wlen.simplified.base) step_dl = float(wstep.simplified.base) # Define the centers of the sliding windows where the CV must be computed cv_times = numpy.arange(wlen_dl / 2. + start_dl, stop_dl - wlen_dl / 2. + step_dl / 2, step_dl) # Define the nx2 array of time windows within which to compute the CV windows = pq.s * numpy.array([numpy.max([cv_times - wlen_dl / 2., start_dl * numpy.ones( len(cv_times))], axis=0), numpy.min([cv_times + wlen_dl / 2., stop_dl * numpy.ones( len(cv_times))], axis=0)]).T # Compute the CV in each window defined above cv_values = numpy.zeros(len(cv_times)) # Initialize CV values to 0 for i, w in enumerate(windows): x_sliced = [t.time_slice(w[0], w[1]) for t in spiketrain] cv_values[i] = cv(x_sliced) return cv_values, windows
def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, constant=True, verbose=True, missing='drop', **fit_kwargs): ''' ''' # Fit a normal linear model to the data if constant: x_const = sm.add_constant(self.x) else: x_const = self.x if self.weights is None: model = sm.OLS(self.y, x_const, missing=missing) else: model = sm.WLS(self.y, x_const, weights=self.weights, missing=missing) init_lm = model.fit(**fit_kwargs) if verbose: print(init_lm.summary()) epsil = epsil_0 # Before we get into the loop, make sure that this was a bad fit if epsil_0 < tol: warnings.warning('Initial epsilon is smaller than tolerance. \ The tolerance should be set smaller.') return init_lm # Sum of residuals dev_0 = np.sum(init_lm.resid**2.) # Catch cases where a break isn't necessary self.break_fail_flag = False # Count it = 0 # Now loop through and minimize the residuals by changing where the # breaking point is. while np.abs(epsil) > tol: U = (self.x - self.brk) * (self.x > self.brk) V = deriv_max(self.x, self.brk) X_all = np.vstack([self.x, U, V]).T if constant: X_all = sm.add_constant(X_all) if self.weights is None: model = sm.OLS(self.y, X_all, missing=missing) else: model = sm.WLS(self.y, X_all, weights=self.weights, missing=missing) fit = model.fit() beta = fit.params[2] # Get coef gamma = fit.params[3] # Get coef # Adjust the break point new_brk = copy(self.brk) new_brk += (h_step * gamma) / beta # If the new break point is outside of the allowed range, reset # the step size to half of the original, then try stepping again h_it = 0 if not (self.x > new_brk).any() or (self.x > new_brk).all(): while True: # Remove step taken new_brk -= (h_step * gamma) / beta # Now half the step and try again. h_step /= 2.0 new_brk += (h_step * gamma) / beta h_it += 1 if (self.x > new_brk).any() and not (self.x > new_brk).all(): self.brk = new_brk break if h_it >= 5: self.break_fail_flag = True it = iter_max + 1 warnings.warn("Cannot find good step-size, assuming\ break not needed") break else: self.brk = new_brk dev_1 = np.sum(fit.resid**2.) epsil = (dev_1 - dev_0) / (dev_0 + 1e-3) dev_0 = dev_1 if verbose: print("Iteration: %s/%s" % (it + 1, iter_max)) print(fit.summary()) print("Break Point: " + str(self.brk)) print("Epsilon: " + str(epsil)) it += 1 if it > iter_max: warnings.warn("Max iterations reached. \ Result may not be minimized.") break # Is the initial model without a break better? if self.break_fail_flag or np.sum(init_lm.resid**2) <= np.sum(fit.resid**2): # If the initial fit was better, the segmented fit failed. self.break_fail_flag = True self.brk = self.x.max() X_all = sm.add_constant(self.x) else: # With the break point hopefully found, do a final good fit U = (self.x - self.brk) * (self.x > self.brk) V = deriv_max(self.x, self.brk) X_all = np.vstack([self.x, U, V]).T X_all = sm.add_constant(X_all) if self.weights is None: model = sm.OLS(self.y, X_all, missing=missing) else: model = sm.WLS(self.y, X_all, weights=self.weights, missing=missing) self.fit = model.fit() self._params = self.fit.params self._errs = self.fit.bse if not self.break_fail_flag: self.brk_err = brk_errs(self.params, fit.cov_params()) else: self.brk_err = 0.0 self.get_slopes()
def locfdr(zz, bre = 120, df = 7, pct = 0., pct0 = 1./4, nulltype = 1, type = 0, plot = 1, mult = None, mlests = None, main = ' ', sw = 0, verbose = True, showplot = True, saveplot = False, saveroot = 'locfdr', saveext = 'pdf', savestamp = False): """Computes local false discovery rates. This is Abhinav Nellore's Python implementation of the R function locfdr() v1.1.7, originally written by Bradley Efron, Brit B. Turnbull, and Balasubramanian Narasimhan; and later enhanced by Alyssa Frazee, Leonardo Collado-Torres, and Jeffrey Leek (see https://github.com/alyssafrazee/derfinder/blob/master/R/locfdrFit.R ). It is licensed under the GNU GPL v2. See COPYING for more information. The port is relatively faithful. Variable names are almost precisely the same; if the original variable name contained a period, that period is replaced by an underscore here. (So 'Cov2.out' in the R is 'Cov2_out' in the Python.) To access returned values: (in R) --- results = locfdr(...) results$fdr results$z.2 (in Python) --- results = locfdr(...) results['fdr'] results['z_2'] Some returned values are pandas Series and DataFrames. An introduction to pandas data structures is available at http://pandas.pydata.org/pandas-docs/dev/dsintro.html . A nearly complete description of arguments and returned values may be found at http://cran.r-project.org/web/packages/locfdr/vignettes/locfdr-example.pdf . Additional arguments in this version: verbose: (True or False) --- If True, outputs warnings. showplot: (True or False) --- If True, displays plot. Ignored if plot = 0. saveplot: (True or False) --- If True, saves plot according to constraints specified by saveroot, saveext, and savestamp. Ignored if plot = 0. saveroot: (Any string that constitutes a valid filename.) --- Specifies prefix of file to save. Ignored if saveplot = False. saveext: (Most valid image file extensions work here. Try 'png', 'pdf', 'ps', 'eps', or 'svg'.) --- Selects file format and extension. Ignored if saveplot = False. savestamp: (True or False) --- If True, date/timestamp is appended to filename prefix; this helps prevent overwriting old saves. Ignored if saveplot = False. Additional returned values in this version: yt: Heights of pink histogram bars that appear on the plots (i.e., heights of alt. density's histogram). x: Locations of pinkfl histogram bars that appear on the plots (locations of alt. density's histogram). mlest_lo AND mlest_hi: If the function outputs a warning message that reads "please rerun with mlest parameters = ...", these parameters are contained in mlest_lo and mlest_hi . needsfix: 1 if a rerun warning is output; otherwise 0. nulldens: y-values of estimated null distribution density. nulldens: y-values of estimated full (mixture) density.""" call = it.stack() zz = np.array(zz) mlest_lo = None mlest_hi = None yt = None x = None needsfix = 0 try: brelength = len(bre) lo = min(bre) up = max(bre) bre = brelength except TypeError: try: len(pct) lo = pct[0] up = pct[1] # the following line is present to mimic how R handles [if (pct > 0)] (see code below) when pct is an array pct = pct[0] except TypeError: if pct == 0: lo = min(zz) up = max(zz) elif pct < 0: med = np.median(zz) lo = med + (1 - pct) * (min(zz) - med) up = med + (1 - pct) * (max(zz) - med) elif pct > 0: lo = np.percentile(zz, pct * 100) up = np.percentile(zz, (1 - pct) * 100) zzz = np.array([max(min(el, up), lo) for el in zz]) breaks = np.linspace(lo, up, bre) x = (breaks[1:] + breaks[0:-1]) / 2. y = np.histogram(zzz, bins = len(breaks) - 1)[0] yall = y K = len(y) N = len(zz) if pct > 0: y[0] = min(y[0], 1.) y[K-1] = min(y[K-1], 1) if not type: basismatrix = rf.ns(x, df) X = np.ones((basismatrix.shape[0], basismatrix.shape[1]+1), dtype=np.float64) X[:, 1:] = basismatrix f = glm("y ~ basismatrix", data = dict(y=np.matrix(y).transpose(), basismatrix=basismatrix), family=families.Poisson()).fit().fittedvalues else: basismatrix = rf.poly(x, df) X = np.ones((basismatrix.shape[0], basismatrix.shape[1]+1), dtype=np.float64) X[:, 1:] = basismatrix f = glm("y ~ basismatrix", data = dict(y=np.matrix(y).transpose(), basismatrix=basismatrix), family=families.Poisson()).fit().fittedvalues fulldens = f l = np.log(f) Fl = f.cumsum() Fr = f[::-1].cumsum() D = ((y - f) / np.sqrt((f + 1))) D = sum(np.power(D[1:(K-1)], 2)) / (K - 2 - df) if D > 1.5: wa.warn("f(z) misfit = " + str(round(D,1)) + ". Rerun with larger df.") if nulltype == 3: fp0 = pd.DataFrame(np.zeros((6,4)).fill(np.nan), index=['thest', 'theSD', 'mlest', 'mleSD', 'cmest', 'cmeSD'], columns=['delta', 'sigleft', 'p0', 'sigright']) else: fp0 = pd.DataFrame(np.zeros((6,3)).fill(np.nan), index=['thest', 'theSD', 'mlest', 'mleSD', 'cmest', 'cmeSD'], columns=['delta', 'sigma', 'p0']) fp0.loc['thest'][0:2] = np.array([0,1]) fp0.loc['theSD'][0:2] = 0 imax = l.argmax() xmax = x[imax] try: len(pct) pctlo = pct0[0] pctup = pct0[1] except TypeError: pctup = 1 - pct0 pctlo = pct0 lo0 = np.percentile(zz, pctlo*100) hi0 = np.percentile(zz, pctup*100) nx = len(x) i0 = np.array([i for i, el in enumerate(x) if el > lo0 and el < hi0]) x0 = np.array([el for el in x if el > lo0 and el < hi0]) y0 = np.array([el for i,el in enumerate(l) if x[i] > lo0 and x[i] < hi0]) xsubtract = x0 - xmax X00 = np.zeros((2, len(xsubtract))) if nulltype == 3: X00[0, :] = np.power(xsubtract, 2) X00[1, :] = [max(el, 0)*max(el, 0) for el in xsubtract] else: X00[0, :] = xsubtract X00[1, :] = np.power(xsubtract, 2) X00 = X00.transpose() co = glm("y0 ~ X00", data = dict(y0=y0, X00=X00)).fit().params # these errors may not be necessary if nulltype == 3 and ((pd.isnull(co[1]) or pd.isnull(co[2])) or (co[1] >= 0 or co[1] + co[2] >= 0)): raise EstimationError('CM estimation failed. Rerun with nulltype = 1 or 2.') elif pd.isnull(co[2]) or co[2] >= 0: if nulltype == 2: raise EstimationError('CM estimation failed. Rerun with nulltype = 1.') elif nulltype != 3: xsubtract2 = x - xmax X0 = np.ones((3, len(xsubtract2))) X0[1, :] = xsubtract2 X0[2, :] = np.power(xsubtract2, 2) X0 = X0.transpose() wa.warn('CM estimation failed; middle of histogram nonnormal') else: xsubtract2 = x - xmax X0 = np.ones((3, len(xsubtract2))) if nulltype == 3: X0[1, :] = np.power(xsubtract2, 2) X0[2, :] = [max(el, 0)*max(el, 0) for el in xsubtract2] sigs = np.array([1/np.sqrt(-2*co[1]), 1/np.sqrt(-2*(co[1]+co[2]))]) fp0.loc['cmest'][0] = xmax fp0.loc['cmest'][1] = sigs[0] fp0.loc['cmest'][3] = sigs[1] else: X0[1, :] = xsubtract2 X0[2, :] = np.power(xsubtract2, 2) xmaxx = -co[1] / (2 * co[2]) + xmax sighat = 1 / np.sqrt(-2 * co[2]) fp0.loc['cmest'][[0,1]] = [xmaxx, sighat] X0 = X0.transpose() l0 = np.array((X0 * np.matrix(co).transpose()).transpose())[0] f0 = np.exp(l0) p0 = sum(f0) / float(sum(f)) f0 = f0 / p0 fp0.loc['cmest'][2] = p0 b = 4.3 * np.exp(-0.26 * np.log10(N)) if mlests == None: med = np.median(zz) sc = (np.percentile(zz, 75) - np.percentile(zz, 25)) / (2 * stats.norm.ppf(.75)) mlests = lf.locmle(zz, xlim = np.array([med, b * sc])) if N > 5e05: if verbose: wa.warn('length(zz) > 500,000: an interval wider than the optimal one was used for maximum likelihood estimation. To use the optimal interval, rerun with mlests = [' + str(mlests[0]) + ', ' + str(b * mlests[1]) + '].') mlest_lo = mlests[0] mlest_hi = b * mlests[1] needsfix = 1 mlests = lf.locmle(zz, xlim = [med, sc]) if not pd.isnull(mlests[0]): if N > 5e05: b = 1 if nulltype == 1: Cov_in = {'x' : x, 'X' : X, 'f' : f, 'sw' : sw} ml_out = lf.locmle(zz, xlim = [mlests[0], b * mlests[1]], d = mlests[0], s = mlests[1], Cov_in = Cov_in) mlests = ml_out['mle'] else: mlests = lf.locmle(zz, xlim = [mlests[0], b * mlests[1]], d = mlests[0], s = mlests[1]) fp0.loc['mlest'][0:3] = mlests[0:3] fp0.loc['mleSD'][0:3] = mlests[3:6] if (not (pd.isnull(fp0.loc['mlest'][0]) or pd.isnull(fp0.loc['mlest'][1]) or pd.isnull(fp0.loc['cmest'][0]) or pd.isnull(fp0.loc['cmest'][1]))) and nulltype > 1: if abs(fp0.loc['cmest'][0] - mlests[0]) > 0.05 or abs(np.log(fp0.loc['cmest'][1] / mlests[1])) > 0.05: wa.warn('Discrepancy between central matching and maximum likelihood estimates. Consider rerunning with nulltype = 1.') if pd.isnull(mlests[0]): if nulltype == 1: if pd.isnull(fp0.loc['cmest'][1]): raise EstimationError('CM and ML estimation failed; middle of histogram is nonnormal.') else: raise EstimationError('ML estimation failed. Rerun with nulltype = 2.') else: wa.warn('ML estimation failed.') if nulltype < 2: xmaxx = mlests[0] xmax = mlests[0] delhat = mlests[0] sighat = mlests[1] p0 = mlests[2] f0 = np.array([stats.norm.pdf(el, delhat, sighat) for el in x]) f0 = (sum(f) * f0) / sum(f0) fdr = np.array([min(el, 1) for el in (p0 * (f0 / f))]) f00 = np.exp(-np.power(x, 2) / 2) f00 = (f00 * sum(f)) / sum(f00) p0theo = sum(f[i0]) / sum(f00[i0]) fp0.loc['thest'][2] = p0theo fdr0 = np.array([min(el, 1) for el in ((p0theo * f00) / f)]) f0p = p0 * f0 if nulltype == 0: f0p = p0theo * f00 F0l = f0p.cumsum() F0r = f0p[::-1].cumsum() Fdrl = F0l / Fl Fdrr = (F0r / Fr)[::-1] Int = (1 - fdr) * f * (fdr < 0.9) if np.any([x[i] <= xmax and fdr[i] == 1 for i in xrange(len(fdr))]): xxlo = min([el for i,el in enumerate(x) if el <= xmax and fdr[i] == 1]) else: xxlo = xmax if np.any([x[i] >= xmax and fdr[i] == 1 for i in xrange(len(fdr))]): xxhi = max([el for i,el in enumerate(x) if el >= xmax and fdr[i] == 1]) else: xxhi = xmax indextest = [i for i,el in enumerate(x) if el >= xxlo and el <= xxhi] if len(indextest) > 0: fdr[indextest] = 1 indextest = [i for i,el in enumerate(x) if el <= xmax and fdr0[i] == 1] if len(indextest) > 0: xxlo = min(x[indextest]) else: xxlo = xmax indextest = [i for i,el in enumerate(x) if el >= xmax and fdr0[i] == 1] if len(indextest) > 0: xxhi = max(x[indextest]) else: xxhi = xmax indextest = [i for i,el in enumerate(x) if el >= xxlo and el <= xxhi] if len(indextest) > 0: fdr0[indextest] = 1 if nulltype == 1: indextest = [i for i,el in enumerate(x) if el >= mlests[0] - mlests[1] and el <= mlests[0] + mlests[1]] fdr[indextest] = 1 fdr0[indextest] = 1 p1 = sum((1 - fdr) * f) / N p1theo = sum((1 - fdr0) * f) / N fall = f + (yall - y) Efdr = sum((1 - fdr) * fdr * fall) / sum((1 - fdr) * fall) Efdrtheo = sum((1 - fdr0) * fdr0 * fall) / sum((1 - fdr0) * fall) iup = [i for i,el in enumerate(x) if el >= xmax] ido = [i for i,el in enumerate(x) if el <= xmax] Eleft = sum((1 - fdr[ido]) * fdr[ido] * fall[ido]) / sum((1 - fdr[ido]) * fall[ido]) Eleft0 = sum((1 - fdr0[ido]) * fdr0[ido] * fall[ido])/sum((1 - fdr0[ido]) * fall[ido]) Eright = sum((1 - fdr[iup]) * fdr[iup] * fall[iup])/sum((1 - fdr[iup]) * fall[iup]) Eright0 = sum((1 - fdr0[iup]) * fdr0[iup] * fall[iup])/sum((1 - fdr0[iup]) * fall[iup]) Efdr = np.array([Efdr, Eleft, Eright, Efdrtheo, Eleft0, Eright0]) for i,el in enumerate(Efdr): if pd.isnull(el): Efdr[i] = 1 Efdr = pd.Series(Efdr, index=['Efdr', 'Eleft', 'Eright', 'Efdrtheo', 'Eleft0', 'Eright0']) if nulltype == 0: f1 = (1 - fdr0) * fall else: f1 = (1 - fdr) * fall if mult != None: try: mul = np.ones(len(mult) + 1) mul[1:] = mult except TypeError: mul = np.array([1, mult]) EE = np.zeros(len(mul)) for m in xrange(len(EE)): xe = np.sqrt(mul[m]) * x f1e = rf.approx(xe, f1, x, rule = 2, ties = 'mean') f1e = (f1e * sum(f1)) / sum(f1e) f0e = f0 p0e = p0 if nulltype == 0: f0e = f00 p0e = p0theo fdre = (p0e * f0e) / (p0e * f0e + f1e) EE[m] = sum(f1e * fdre) / sum(f1e) EE = EE / EE[0] EE = pd.Series(EE, index=mult) Cov2_out = lf.loccov2(X, X0, i0, f, fp0.loc['cmest'], N) Cov0_out = lf.loccov2(X, np.ones((len(x), 1)), i0, f, fp0.loc['thest'], N) if sw == 3: if nulltype == 0: Ilfdr = Cov0_out['Ilfdr'] elif nulltype == 1: Ilfdr = ml_out['Ilfdr'] elif nulltype == 2: Ilfdr = Cov2_out['Ilfdr'] else: raise InputError('When sw = 3, nulltype must be 0, 1, or 2.') return Ilfdr if nulltype == 0: Cov = Cov0_out['Cov'] elif nulltype == 1: Cov = ml_out['Cov_lfdr'] else: Cov = Cov2_out['Cov'] lfdrse = np.sqrt(np.diag(Cov)) fp0.loc['cmeSD'][0:3] = Cov2_out.loc['stdev'][[1,2,0]] if nulltype == 3: fp0.loc['cmeSD'][3] = fp0['cmeSD'][1] fp0.loc['theSD'][2] = Cov0_out['stdev'][0] if sw == 2: if nulltype == 0: pds = fp0.loc['thest'][[2, 0, 1]] stdev = fp0.loc['theSD'][[2, 0, 1]] pds_ = Cov0_out['pds_'].transpose() elif nulltype == 1: pds = fp0.loc['mlest'][[2, 0, 1]] stdev = fp0.loc['mleSD'][[2, 0, 1]] pds_ = ml_out['pds_'].transpose() elif nulltype == 2: pds = fp0.loc['cmest'][[2, 0, 1]] stdev = fp0.loc['cmeSD'][[2, 0, 1]] pds_ = Cov2_out['pds_'].transpose() else: raise InputError('When sw = 2, nulltype must equal 0, 1, or 2.') pds_ = pd.DataFrame(pds_, columns=['p0', 'delhat', 'sighat']) pds = pd.Series(pds, index=['p0', 'delhat', 'sighat']) stdev = pd.Series(stdev, index=['sdp0', 'sddelhat', 'sdsighat']) return pd.Series({'pds': pds, 'x': x, 'f': f, 'pds_' : pds_, 'stdev' : stdev}) p1 = np.arange(0.01, 1, 0.01) cdf1 = np.zeros((2,99)) cdf1[0, :] = p1 if nulltype == 0: fd = fdr0 else: fd = fdr for i in xrange(99): cdf1[1, i] = sum([el for j,el in enumerate(f1) if fd[j] <= p1[i]]) cdf1[1, :] = cdf1[1, :] / cdf1[1, -1] cdf1 = cdf1.transpose() if nulltype != 0: mat = pd.DataFrame(np.vstack((x, fdr, Fdrl, Fdrr, f, f0, f00, fdr0, yall, lfdrse, f1)), index=['x', 'fdr', 'Fdrleft', 'Fdrright', 'f', 'f0', 'f0theo', 'fdrtheo', 'counts', 'lfdrse', 'p1f1']) else: mat = pd.DataFrame(np.vstack((x, fdr, Fdrl, Fdrr, f, f0, f00, fdr0, yall, lfdrse, f1)), index=['x', 'fdr', 'Fdrltheo', 'Fdrrtheo', 'f', 'f0', 'f0theo', 'fdrtheo', 'counts', 'lfdrsetheo', 'p1f1']) z_2 = np.array([np.nan, np.nan]) m = sorted([(i, el) for i, el in enumerate(fd)], key=lambda nn: nn[1])[-1][0] if fd[-1] < 0.2: z_2[1] = rf.approx(fd[m:], x[m:], 0.2, ties = 'mean') if fd[0] < 0.2: z_2[0] = rf.approx(fd[0:m], x[0:m], 0.2, ties = 'mean') if nulltype == 0: nulldens = p0theo * f00 else: nulldens = p0 * f0 yt = np.array([max(el, 0) for el in (yall * (1 - fd))]) # construct plots if plot > 0: try: import matplotlib.pyplot as plt import matplotlib.patches as patches import matplotlib.path as path except ImportError: print 'matplotlib is required for plotting, but it was not found. Rerun with plot = 0 to turn off plots.' print 'locfdr-python was tested on matplotlib 1.3.0.' raise fig = plt.figure(figsize=(14, 8)) if plot == 4: histplot = fig.add_subplot(131) fdrFdrplot = fig.add_subplot(132) f1cdfplot = fig.add_subplot(133) elif plot == 2 or plot == 3: histplot = fig.add_subplot(121) if plot == 2: fdrFdrplot = fig.add_subplot(122) else: f1cdfplot = fig.add_subplot(122) elif plot == 1: histplot = fig.add_subplot(111) # construct histogram leftplt = breaks[:-1] rightplt = breaks[1:] bottomplt = np.zeros(len(leftplt)) topplt = bottomplt + y XYplt = np.array([[leftplt,leftplt,rightplt,rightplt], [bottomplt,topplt,topplt,bottomplt]]).transpose() barpath = path.Path.make_compound_path_from_polys(XYplt) patch = patches.PathPatch(barpath, facecolor='white', edgecolor='#302f2f') histplot.add_patch(patch) histplot.set_xlim(leftplt[0], rightplt[-1]) histplot.set_ylim(-1.5, (topplt.max()+1.5) * 0.1 + topplt.max()) histplot.set_title(main) for k in xrange(K): histplot.plot([x[k], x[k]], [0, yt[k]], color='#e31d76', linewidth = 2) if nulltype == 3: histplot.set_xlabel('delta = ' + str(round(xmax, 3)) + ', sigleft = ' + str(round(sigs[0], 3)) + ', sigright = ' + str(round(sigs[1], 3)) + ', p0 = ' + str(round(fp0.loc['cmest'][2], 3))) if nulltype == 1 or nulltype == 2: histplot.set_xlabel('MLE: delta = ' + str(round(mlests[0], 3)) + ', sigma = ' + str(round(mlests[1], 3)) + ', p0 = ' + str(round(mlests[2], 3)) + '\nCME: delta = ' + str(round(fp0.loc['cmest'][0], 3)) + ', sigma = ' + str(round(fp0.loc['cmest'][1], 3)) + ', p0 = ' + str(round(fp0.loc['cmest'][2], 3))) histplot.set_ylabel('Frequency') histplot.plot(x, f, color='#3bbf53', linewidth = 3) if nulltype == 0: histplot.plot(x, p0theo * f00, linewidth = 3, linestyle = 'dashed', color = 'blue') else: histplot.plot(x, p0 * f0, linewidth = 3, linestyle = 'dashed', color = 'blue') if not pd.isnull(z_2[1]): histplot.plot([z_2[1]], [-0.5], marker = '^', markersize = 16, markeredgecolor = 'red', markeredgewidth = 1.3, color = 'yellow') if not pd.isnull(z_2[0]): histplot.plot([z_2[0]], [-0.5], marker = '^', markersize = 16, markeredgecolor = 'red', markeredgewidth = 1.3, color = 'yellow') if nulltype == 1 or nulltype == 2: Ef = Efdr[0] elif nulltype == 0: Ef = Efdr[3] # construct fdr + Fdr plot if plot == 2 or plot == 4: if nulltype == 0: fdd = fdr0 else: fdd = fdr fdrFdrplot.plot(x, fdd, linewidth = 3, color = 'black') fdrFdrplot.plot(x, Fdrl, linewidth = 3, color = 'red', linestyle = 'dashed') fdrFdrplot.plot(x, Fdrr, linewidth = 3, color = 'green', linestyle = 'dashed') fdrFdrplot.set_ylim(-0.05, 1.1) fdrFdrplot.set_title('fdr (solid); Fdr\'s (dashed)') fdrFdrplot.set_xlabel('Efdr = ' + str(round(Ef, 3))) fdrFdrplot.set_ylabel('fdd (black), Fdrl (red), and Fdrr (green)') fdrFdrplot.plot([0, 0], [0, 1], linestyle = 'dotted', color = 'red') fdrFdrplot.axhline(linestyle = 'dotted', color = 'red') # construct plot of f1 cdf of estimated fdr curve if plot == 3 or plot == 4: if sum([1 for el in cdf1[:, 1] if pd.isnull(el)]) == cdf1.shape[0]: wa.warning('cdf1 is not available.') else: f1cdfplot.plot(cdf1[:, 0], cdf1[:, 1], linewidth = 3, color = 'black') f1cdfplot.set_xlabel('fdr level\nEfdr = ' + str(round(Ef, 3))) f1cdfplot.set_ylabel('f1 proportion < fdr level') f1cdfplot.set_title('f1 cdf of estimated fdr') f1cdfplot.set_ylim(0, 1) f1cdfplot.plot([0.2, 0.2], [0, cdf1[19, 1]], color = 'blue', linestyle = 'dashed') f1cdfplot.plot([0, 0.2], [cdf1[19, 1], cdf1[19, 1]], color = 'blue', linestyle = 'dashed') f1cdfplot.text(0.05, cdf1[19, 1], str(round(cdf1[19, 1], 2))) if saveplot: if savestamp: import time, datetime plt.savefig(saveroot + '_' + '-'.join(str(el) for el in list(tuple(datetime.datetime.now().timetuple())[:6])) + '.' + saveext) else: plt.savefig(saveroot + '.' + saveext) if showplot: plt.show() if nulltype == 0: ffdr = rf.approx(x, fdr0, zz, rule = 2, ties = 'ordered') else: ffdr = rf.approx(x, fdr, zz, rule = 2, ties = 'ordered') if mult != None: return {'fdr' : ffdr, 'fp0' : fp0, 'Efdr' : Efdr, 'cdf1' : cdf1, 'mat' : mat, 'z_2' : z_2, 'yt' : yt, 'call' : call, 'x' : x, 'mlest_lo' : mlest_lo, 'mlest_hi' : mlest_hi, 'needsfix' : needsfix, 'nulldens' : nulldens, 'fulldens' : fulldens, 'mult' : EE} return {'fdr' : ffdr, 'fp0' : fp0, 'Efdr' : Efdr, 'cdf1' : cdf1, 'mat' : mat, 'z_2' : z_2, 'yt' : yt, 'call' : call, 'x' : x, 'mlest_lo' : mlest_lo, 'mlest_hi' : mlest_hi, 'needsfix' : needsfix, 'nulldens' : nulldens, 'fulldens' : fulldens}
def _get_segment(cls, user, date): if in_transaction(): warnings.warning('Inside a transaction: may cause performance issues.', RuntimeWarning, stacklevel=3) return cls.get_segment(user, date)
def setdata(self, x, y, sigmay=None, xmin=None, xmax=None): warnings.warning("Method renamed to setData", DeprecationWarning) self.setData(x, y, sigmay, xmin, xmax)
def __init__(self, loader, groups=None, filename=C.DEFAULT_HOST_LIST): if groups is None: groups = dict() self.names = os.listdir(filename) self.names.sort() self.directory = filename self.parsers = [] self.hosts = {} self.groups = groups self._loader = loader for i in self.names: # Skip files that end with certain extensions or characters if any(i.endswith(ext) for ext in C.DEFAULT_INVENTORY_IGNORE): continue # Skip hidden files if i.startswith('.') and not i.startswith('./'): continue # These are things inside of an inventory basedir if i in ("host_vars", "group_vars", "vars_plugins"): continue fullpath = os.path.join(self.directory, i) if os.path.isdir(fullpath): parser = InventoryDirectory(loader=loader, groups=groups, filename=fullpath) else: parser = get_file_parser(fullpath, self.groups, loader) if parser is None: #FIXME: needs to use display import warnings warnings.warning("Could not find parser for %s, skipping" % fullpath) continue self.parsers.append(parser) # retrieve all groups and hosts form the parser and add them to # self, don't look at group lists yet, to avoid # recursion trouble, but just make sure all objects exist in self newgroups = parser.groups.values() for group in newgroups: for host in group.hosts: self._add_host(host) for group in newgroups: self._add_group(group) # now check the objects lists so they contain only objects from # self; membership data in groups is already fine (except all & # ungrouped, see later), but might still reference objects not in self for group in self.groups.values(): # iterate on a copy of the lists, as those lists get changed in # the loop # list with group's child group objects: for child in group.child_groups[:]: if child != self.groups[child.name]: group.child_groups.remove(child) group.child_groups.append(self.groups[child.name]) # list with group's parent group objects: for parent in group.parent_groups[:]: if parent != self.groups[parent.name]: group.parent_groups.remove(parent) group.parent_groups.append(self.groups[parent.name]) # list with group's host objects: for host in group.hosts[:]: if host != self.hosts[host.name]: group.hosts.remove(host) group.hosts.append(self.hosts[host.name]) # also check here that the group that contains host, is # also contained in the host's group list if group not in self.hosts[host.name].groups: self.hosts[host.name].groups.append(group) # extra checks on special groups all and ungrouped # remove hosts from 'ungrouped' if they became member of other groups if 'ungrouped' in self.groups: ungrouped = self.groups['ungrouped'] # loop on a copy of ungrouped hosts, as we want to change that list for host in frozenset(ungrouped.hosts): if len(host.groups) > 1: host.groups.remove(ungrouped) ungrouped.hosts.remove(host) # remove hosts from 'all' if they became member of other groups # all should only contain direct children, not grandchildren # direct children should have dept == 1 if 'all' in self.groups: allgroup = self.groups['all' ] # loop on a copy of all's child groups, as we want to change that list for group in allgroup.child_groups[:]: # groups might once have beeen added to all, and later be added # to another group: we need to remove the link wit all then if len(group.parent_groups) > 1 and allgroup in group.parent_groups: # real children of all have just 1 parent, all # this one has more, so not a direct child of all anymore group.parent_groups.remove(allgroup) allgroup.child_groups.remove(group) elif allgroup not in group.parent_groups: # this group was once added to all, but doesn't list it as # a parent any more; the info in the group is the correct # info allgroup.child_groups.remove(group)
def svg_shapes_to_df(svg_source, xpath='//svg:path | //svg:polygon', namespaces=INKSCAPE_NSMAP): ''' Construct a data frame with one row per vertex for all shapes in :data:`svg_source``. Arguments --------- svg_source : str or file-like A file path, URI, or file-like object. xpath : str, optional XPath path expression to select shape nodes. By default, all ``svg:path`` and ``svg:polygon`` elements are selected. namespaces : dict, optional Key/value mapping of XML namespaces. Returns ------- pandas.DataFrame Frame with one row per vertex for all shapes in :data:`svg_source`, with the following columns: - ``vertex_i``: The index of the vertex within the corresponding shape. - ``x``: The x-coordinate of the vertex. - ``y``: The y-coordinate of the vertex. - other: attributes of the SVG shape element (e.g., ``id``, ``fill``, etc.) ''' from lxml import etree e_root = etree.parse(svg_source) frames = [] attribs_set = set() # Get list of attributes that are set in any of the shapes (not including # the `svg:path` `"d"` attribute or the `svg:polygon` `"points"` # attribute). # # This, for example, collects attributes such as: # # - `fill`, `stroke` (as part of `"style"` attribute) # - `"transform"`: matrix, scale, etc. for shape_i in e_root.xpath(xpath, namespaces=namespaces): attribs_set.update(shape_i.attrib.keys()) for k in ('d', 'points'): if k in attribs_set: attribs_set.remove(k) attribs = list(sorted(attribs_set)) # Always add 'id' attribute as first attribute. if 'id' in attribs: attribs.remove('id') attribs.insert(0, 'id') for shape_i in e_root.xpath(xpath, namespaces=namespaces): # Gather shape attributes from SVG element. base_fields = [shape_i.attrib.get(k, None) for k in attribs] if shape_i.tag == '{http://www.w3.org/2000/svg}path': # Decode `svg:path` vertices from [`"d"`][1] attribute. # # [1]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/d points_i = [base_fields + [i] + map(float, [m.group(v) for v in 'xy']) for i, m in enumerate(cre_path_command .finditer(shape_i.attrib['d']))] elif shape_i.tag == '{http://www.w3.org/2000/svg}polygon': # Decode `svg:polygon` vertices from [`"points"`][2] attribute. # # [2]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/points points_i = [base_fields + [i] + map(float, v.split(',')) for i, v in enumerate(shape_i.attrib['points'] .strip().split(' '))] else: warnings.warning('Unsupported shape tag type: %s' % shape_i.tag) continue frames.extend(points_i) if not frames: # There were no shapes found, so set `frames` list to `None` to allow # an empty data frame to be created. frames = None return pd.DataFrame(frames, columns=attribs + ['vertex_i', 'x', 'y'])
def factorize(self, mode='standard', **kwargs): """Factorize terms in the expression. For example: :: A[i]*B[j] + A[i]*C[j] becomes :: A[i]*(B[j] + C[j]). :param mode: multiple factorization strategies are possible, each exposing different, "hidden" opportunities for code motion. * mode == 'standard': factorize symbols along the dimension that appears most often in the expression. * mode == 'all': factorize symbols depending on at least one of the expression's dimensions. * mode == 'domain': factorize symbols depending on the expression's domain. * mode == 'outdomain': factorize symbols independent of the expression's domain. * mode == 'constants': factorize symbols independent of any loops enclosing the expression. """ if mode == 'standard': retval = FindInstances.default_retval() symbols = FindInstances(Symbol).visit(self.stmt.rvalue, ret=retval)[Symbol] # The heuristics privileges domain dimensions dims = self.expr_info.out_domain_dims if not dims or self.expr_info.dimension >= 2: dims = self.expr_info.domain_dims # Get the dimension occurring most often occurrences = [tuple(r for r in s.rank if r in dims) for s in symbols] occurrences = [i for i in occurrences if i] if not occurrences: return self # Finally, establish the factorization dimension dimension = Counter(occurrences).most_common(1)[0][0] should_factorize = lambda n: set(dimension).issubset(set(n.rank)) elif mode in ['all', 'domain', 'outdomain', 'constants']: info = visit(self.expr_info.outermost_loop, info_items=['symbols_dep']) symbols = defaultdict(set) for s, dep in info['symbols_dep'].items(): symbols[s.symbol] |= {l.dim for l in dep} if mode == 'all': should_factorize = lambda n: symbols.get(n.symbol) and \ any(r in self.expr_info.dims for r in symbols[n.symbol]) elif mode == 'domain': should_factorize = lambda n: symbols.get(n.symbol) and \ any(r in self.expr_info.domain_dims for r in symbols[n.symbol]) elif mode == 'outdomain': should_factorize = lambda n: symbols.get(n.symbol) and \ not symbols[n.symbol].issubset(set(self.expr_info.domain_dims)) elif mode == 'constants': should_factorize = lambda n: not symbols.get(n.symbol) else: warning('Unknown factorization strategy. Skipping.') return # Perform the factorization self.expr_factorizer.factorize(should_factorize) return self
def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10, constant=True, verbose=True): ''' ''' # Fit a normal linear model to the data if constant: x_const = sm.add_constant(self.x) model = sm.OLS(self.y, x_const) else: model = sm.OLS(self.y, self.x) init_lm = model.fit() if verbose: print init_lm.summary() epsil = epsil_0 # Before we get into the loop, make sure that this was a bad fit if epsil_0 < tol: warnings.warning('Initial epsilon is smaller than tolerance. \ The tolerance should be set smaller.') return init_lm # Sum of residuals dev_0 = np.sum(init_lm.resid**2.) # Count it = 0 h_it = 0 # Now loop through and minimize the residuals by changing where the # breaking point is. while np.abs(epsil) > tol: U = (self.x - self.brk) * (self.x > self.brk) V = deriv_max(self.x, self.brk) X_all = np.vstack([self.x, U, V]).T if constant: X_all = sm.add_constant(X_all) model = sm.OLS(self.y, X_all) fit = model.fit() beta = fit.params[2] # Get coef gamma = fit.params[3] # Get coef # Adjust the break point new_brk = copy(self.brk) new_brk += (h_step * gamma) / beta # If the new break point is outside of the allowed range, reset # the step size to half of the original, then try stepping again if not (self.x > new_brk).any(): while True: h_step /= 2.0 new_brk += (h_step * gamma) / beta h_it += 1 if (self.x > new_brk).any(): self.brk = new_brk break if h_it >= 5: raise ValueError("Cannot find suitable step size. \ Check number of breaks.") else: self.brk = new_brk dev_1 = np.sum(fit.resid**2.) epsil = (dev_1 - dev_0) / (dev_0 + 1e-3) dev_0 = dev_1 if verbose: print "Iteration: %s/%s" % (it+1, iter_max) print fit.summary() print "Break Point: " + str(self.brk) print "Epsilon: " + str(epsil) it += 1 if it > iter_max: warnings.warning("Max iterations reached. \ Result may not be minimized.") break # With the break point hopefully found, do a final good fit U = (self.x - self.brk) * (self.x > self.brk) V = deriv_max(self.x, self.brk) X_all = np.vstack([self.x, U, V]).T X_all = sm.add_constant(X_all) model = sm.OLS(self.y, X_all) self.fit = model.fit() self._params = self.fit.params cov_matrix = self.fit.cov_params() self._errs = np.asarray([np.sqrt(cov_matrix[i, i]) for i in range(cov_matrix.shape[0])]) self.brk_err = brk_errs(fit.params, fit.cov_params()) self.get_slopes() return self
def wcs_from_footprints(dmodels, refmodel=None, transform=None, bounding_box=None, domain=None): """ Create a WCS from a list of input data models. A fiducial point in the output coordinate frame is created from the footprints of all WCS objects. For a spatial frame this is the center of the union of the footprints. For a spectral frame the fiducial is in the beginning of the footprint range. If ``refmodel`` is None, the first WCS object in the list is considered a reference. The output coordinate frame and projection (for celestial frames) is taken from ``refmodel``. If ``transform`` is not suplied, a compound transform is created using CDELTs and PC. If ``bounding_box`` is not supplied, the bounding_box of the new WCS is computed from bounding_box of all input WCSs. Parameters ---------- dmodels : list of `~jwst.datamodels.DataModel` A list of data models. refmodel : `~jwst.datamodels.DataModel`, optional This model's WCS is used as a reference. WCS. The output coordinate frame, the projection and a scaling and rotation transform is created from it. If not supplied the first model in the list is used as ``refmodel``. transform : `~astropy.modeling.core.Model`, optional A transform, passed to :meth:`~gwcs.wcstools.wcs_from_fiducial` If not supplied Scaling | Rotation is computed from ``refmodel``. bounding_box : tuple, optional Bounding_box of the new WCS. If not supplied it is computed from the bounding_box of all inputs. """ if domain is not None: warnings.warning("'domain' was deprecated in 0.8 and will be removed from next" "version. Use 'bounding_box' instead.") bb = _domain_to_bounding_box(domain) else: bb = bounding_box wcslist = [im.meta.wcs for im in dmodels] if not isiterable(wcslist): raise ValueError("Expected 'wcslist' to be an iterable of WCS objects.") if not all([isinstance(w, WCS) for w in wcslist]): raise TypeError("All items in wcslist are to be instances of gwcs.WCS.") if refmodel is None: refmodel = dmodels[0] else: if not isinstance(refmodel, DataModel): raise TypeError("Expected refmodel to be an instance of DataModel.") fiducial = compute_fiducial(wcslist, bb) prj = astmodels.Pix2Sky_TAN() if transform is None: transform = [] wcsinfo = pointing.wcsinfo_from_model(refmodel) sky_axes, spec, other = gwutils.get_axes(wcsinfo) rotation = astmodels.AffineTransformation2D(wcsinfo['PC']) transform.append(rotation) if sky_axes: cdelt1, cdelt2 = wcsinfo['CDELT'][sky_axes] scale = np.sqrt(np.abs(cdelt1 * cdelt2)) scales = astmodels.Scale(scale) & astmodels.Scale(scale) transform.append(scales) if transform: transform = functools.reduce(lambda x, y: x | y, transform) out_frame = refmodel.meta.wcs.output_frame wnew = wcs_from_fiducial(fiducial, coordinate_frame=out_frame, projection=prj, transform=transform) footprints = [w.footprint().T for w in wcslist] domain_bounds = np.hstack([wnew.backward_transform(*f) for f in footprints]) for axs in domain_bounds: axs -= axs.min() bounding_box = [] for axis in out_frame.axes_order: axis_min, axis_max = domain_bounds[axis].min(), domain_bounds[axis].max() bounding_box.append((axis_min, axis_max)) bounding_box = tuple(bounding_box) ax1, ax2 = np.array(bounding_box)[sky_axes] offset1 = (ax1[1] - ax1[0]) / 2 offset2 = (ax2[1] - ax2[0]) / 2 offsets = astmodels.Shift(-offset1) & astmodels.Shift(-offset2) wnew.insert_transform('detector', offsets, after=True) wnew.bounding_box = bounding_box return wnew
def warn(self): see_above = self # the next line is what the user will see after the error is printed warning (see_above, SymPyDeprecationWarning)
def deprecation(message): warnings.warning("<" + message + "> is deprecated!", DeprecationWarning, stacklevel=2)
def average_precision(y_true, y_pred, integration='trapz', argsort_kind='quicksort'): """Computes the Average Precision (AP) from the recall and precision arrays. Different 'integration' methods can be used. Parameters ---------- y_true: array, shape = [n_samples] True values, interpreted as strictly positive or not (i.e. converted to binary). Could be in {-1, +1} or {0, 1} or {False, True}. y_pred: array, shape = [n_samples] Predicted values. integration: str, optional Type of 'integration' method used to compute the average precision: 'trapz': trapezoidal rule (default) 'voc2010': see http://goo.gl/glxdO and http://goo.gl/ueXzr 'voc2007': see http://goo.gl/E1YyY argsort_kind: str Sorting algorithm. Returns ------- ap: float Average Precision Note ---- 'voc2007' method is here only for legacy purposes. We do not recommend its use since even simple trivial cases like a perfect match between true values and predicted values do not lead to an average precision of 1. """ # -- basic checks and conversion assert len(y_true) == len(y_pred) assert np.isfinite(y_true).all() assert np.isfinite(y_pred).all() assert integration in ['trapz', 'voc2010', 'voc2007'] y_true = np.array(y_true, dtype=DTYPE) assert y_true.ndim == 1 y_pred = np.array(y_pred, dtype=DTYPE) assert y_pred.ndim == 1 n_uniques = np.unique(y_pred) if n_uniques.size == 1: raise ValueError('Rank of predicted values is ill-defined' ' because all elements are equal') elif n_uniques.size < y_pred.size: warning('some predicted elements have exactly the same value.' ' output will most probably depend on the sorting' ' method used. Here "%s"' % argsort_kind, UserWarning) # -- actual computation rec = recall(y_true, y_pred, argsort_kind=argsort_kind) prec = precision(y_true, y_pred, argsort_kind=argsort_kind) if integration == 'trapz': if rec[0] != 0.: rec = np.concatenate(([0.], rec)) prec = np.concatenate(([prec[0]], prec)) ap = trapz(prec, rec) elif integration == 'voc2010': mrec = np.concatenate(([0.], rec, [1.])) mpre = np.concatenate(([0.], prec, [0.])) rng = np.arange(len(mpre) - 1)[::-1] for i in rng: mpre[i] = max(mpre[i], mpre[i + 1]) sel = np.nonzero(mrec[1:] != mrec[0:-1])[0] + 1 ap = ((mrec[sel] - mrec[sel - 1]) * mpre[sel]).sum() if np.isnan(ap): ap = 0. elif integration == 'voc2007': ap = 0. rng = np.arange(0, 1.1, .1) for th in rng: p = prec[rec >= th] if len(p) > 0: ap += p.max() / rng.size return ap
def licm(self, **kwargs): """Perform generalized loop-invariant code motion.""" if not self._check_loops(self.expr_info.loops): warning("Loop nest unsuitable for generalized licm. Skipping.") return symbols = visit(self.header, info_items=['symbols_dep'])['symbols_dep'] symbols = dict((s, [l.dim for l in dep]) for s, dep in symbols.items()) extracted = True expr_dims_loops = self.expr_info.loops_from_dims expr_outermost_loop = self.expr_info.outermost_loop inv_dep = {} while extracted: extracted = self._extract(self.stmt.rvalue, symbols, **kwargs) for dep, subexprs in extracted.items(): # -1) Remove identical subexpressions subexprs = uniquify(subexprs) # 0) Determine the loop nest level where invariant expressions # should be hoisted. The goal is to hoist them as far as possible # in the loop nest, while minimising temporary storage. # We distinguish six hoisting cases: if len(dep) == 0: # As scalar (/wrap_loop=None/), outside of the loop nest; place = self.header wrap_loop = () next_loop = expr_outermost_loop elif len(dep) == 1 and is_perfect_loop(expr_outermost_loop): # As scalar, outside of the loop nest; place = self.header wrap_loop = (expr_dims_loops[dep[0]],) next_loop = expr_outermost_loop elif len(dep) == 1 and len(expr_dims_loops) > 1: # As scalar, within the loop imposing the dependency place = expr_dims_loops[dep[0]].children[0] wrap_loop = () next_loop = od_find_next(expr_dims_loops, dep[0]) elif len(dep) == 1: # As scalar, right before the expression (which is enclosed # in just a single loop, we can claim at this point) place = expr_dims_loops[dep[0]].children[0] wrap_loop = () next_loop = place.children[place.children.index(self.stmt)] elif set(dep).issuperset(set(self.expr_info.domain_dims)) and \ not any([self.expr_graph.is_written(e) for e in subexprs]): # As n-dimensional vector, where /n == len(dep)/, outside of # the loop nest place = self.header wrap_loop = tuple(expr_dims_loops.values()) next_loop = expr_outermost_loop else: # As vector, within the outermost loop imposing the dependency place = expr_dims_loops[dep[0]].children[0] wrap_loop = tuple(expr_dims_loops[dep[i]] for i in range(1, len(dep))) next_loop = od_find_next(expr_dims_loops, dep[0]) # 1) Create the new invariant temporary symbols loop_size = tuple([l.size for l in wrap_loop]) loop_dim = tuple([l.dim for l in wrap_loop]) inv_syms = [Symbol(self._hoisted_sym % { 'loop_dep': '_'.join(dep).upper() if dep else 'CONST', 'expr_id': self.expr_id, 'round': self.counter, 'i': i }, loop_size) for i in range(len(subexprs))] inv_decls = [Decl(self.expr_info.type, s) for s in inv_syms] inv_syms = [Symbol(s.symbol, loop_dim) for s in inv_syms] # 2) Keep track of new declarations for later easy access for d in inv_decls: d.scope = LOCAL self.decls[d.sym.symbol] = d # 3) Replace invariant subtrees with the proper temporary to_replace = dict(zip(subexprs, inv_syms)) n_replaced = ast_replace(self.stmt.rvalue, to_replace) # 4) Update symbol dependencies for s, e in zip(inv_syms, subexprs): self.expr_graph.add_dependency(s, e) if n_replaced[str(s)] > 1: self.expr_graph.add_dependency(s, s) symbols[s] = dep # 5) Create the body containing invariant statements subexprs = [dcopy(e) for e in subexprs] inv_stmts = [Assign(s, e) for s, e in zip(dcopy(inv_syms), subexprs)] # 6) Track necessary information for AST construction inv_info = (loop_dim, place, next_loop, wrap_loop) if inv_info not in inv_dep: inv_dep[inv_info] = (inv_decls, inv_stmts) else: inv_dep[inv_info][0].extend(inv_decls) inv_dep[inv_info][1].extend(inv_stmts) for inv_info, (inv_decls, inv_stmts) in sorted(inv_dep.items()): loop_dim, place, next_loop, wrap_loop = inv_info # Create the hoisted code if wrap_loop: outer_wrap_loop = ast_make_for(inv_stmts, wrap_loop[-1]) for l in reversed(wrap_loop[:-1]): outer_wrap_loop = ast_make_for([outer_wrap_loop], l) code = inv_decls + [outer_wrap_loop] wrap_loop = outer_wrap_loop else: code = inv_decls + inv_stmts wrap_loop = None # Insert the new nodes at the right level in the loop nest ofs = place.children.index(next_loop) place.children[ofs:ofs] = code + [FlatBlock("\n")] # Track hoisted symbols for i, j in zip(inv_stmts, inv_decls): self.hoisted[j.sym.symbol] = (i, j, wrap_loop, place) # Finally, make sure symbols are unique in the AST self.stmt.rvalue = dcopy(self.stmt.rvalue)
def train_model(args, seed, proxy, pred): trained_model = Training() if args.train: ''' The program is essentially run in one of two mutually exclusive modes (training or test) :param train if True, being parsing and training model file ''' verbose_print(args.verbose, "Training model") if args.datain: warnings.warning("WARNING: The pickle datatype is inherently\ insecure. A quick question: do you trust the\ source of your model? Pickle files can contain\ corrupt code and executable commands.\ They can take over your computer and install\ malicious code on your computer or server. Use\ caution! Your best bet is to train your own\ models and run those! Use --datain at your own\ risk") continue_program = raw_input("Press [Y/y] if you want to continue") if continue_program in ['Y', 'y']: trained_model = existing_training_model(args, seed) else: exit() else: distance = False training_data = False verbose_print(args.verbose, "Reading training set") (user, experimental, chemofeatures, fingerprint) = check_features(args) if (args.distance is True) or (args.cluster is True) or (args.impute is True): '''These functions all require a distance matrix, which is best collected using the fingerprint data''' fingerprint = True training = rt.Read(args.input, pred, user=user, id_name=_id, weights=args.weight) '''This block of code generally works on feature collection and parsing, including the removal of fully redundant features. The difference between remove_static=True and False is whether or not to get rid of fully redundant features. Since the distance matrix is the same, regardless, it is run using original data''' training_data = add_pubchem_features(training, args, user=user, proxy=proxy, fingerprint=fingerprint, experimental=experimental, chemofeatures=chemofeatures, id_name=_id, chunks=_chunks) if (args.cluster is True) or (args.distance is True) or (args.impute is True): verbose_print(args.verbose, "Creating distance matrix") '''Collect distance matrix using the original dataset''' distance = collect_distance_matrix(training_data) '''Extract features from the user and PubChem data''' verbose_print(args.verbose, "Extracting features") training_data = extract_features(training_data, args, user=user, fingerprint=fingerprint, experimental=experimental, chemofeatures=chemofeatures, remove_static=True) '''Discretize the y-values for the the classification process. If no split value is provided then the default for the program is to break the value at the median ''' if training_data.compound: train = bt.Process(training_data, split_value=args.split_value, verbose=args.verbose) if args.impute is True: train.impute_values(distance=distance, verbose=args.verbose) if args.selection is True: train.feature_selection(verbose=args.verbose, seed=args.random) '''If dataout parameter is set, it prints to pickle a file containing the features that were extracted. In later runs this can be specified as the data input using the datain parameter ''' if args.dataout: features_file = args.dataout + ".features" with open(features_file, 'wb') as fid: pickle.dump(train, fid) '''This is where the model is actually trained in the tm module''' model = tm.Train(train) model.train_model() trained_model.model = model '''If dataout parameter is set, it prints to pickle a file containing the RF model. In later runs this can be specified as the data input using the datain parameter ''' if args.dataout: model_file = args.dataout + ".model" with open(model_file, 'wb') as fid: pickle.dump(model, fid) if args.cv: report_model_validation(model, args) if args.cluster: cluster = cl.Clustering(training_data.compound, seed=args.random) cluster.cluster_training(model) trained_model.cluster = cluster if args.dataout: cluster_file = args.dataout + ".cluster" with open(cluster_file, 'wb') as fid: pickle.dump(cluster, fid) else: trained_model = False return trained_model
def _generate_cpu_code(self, kernel, **kwargs): """Generate kernel code according to the various optimization options.""" rewrite = kwargs.get('rewrite') vectorize = kwargs.get('vectorize') v_type, v_param = vectorize if vectorize else (None, None) align_pad = kwargs.get('align_pad') split = kwargs.get('split') toblas = kwargs.get('blas') unroll = kwargs.get('unroll') precompute = kwargs.get('precompute') dead_ops_elimination = kwargs.get('dead_ops_elimination') info = visit(kernel) decls = info['decls'] # Structure up expressions and related metadata nests = defaultdict(OrderedDict) for stmt, expr_info in info['exprs'].items(): parent, nest, domain = expr_info if not nest: continue metaexpr = MetaExpr(check_type(stmt, decls), parent, nest, domain) nests[nest[0]].update({stmt: metaexpr}) loop_opts = [CPULoopOptimizer(loop, header, decls, exprs) for (loop, header), exprs in nests.items()] # Combining certain optimizations is meaningless/forbidden. if unroll and toblas: raise RuntimeError("BLAS forbidden with unrolling") if dead_ops_elimination and split: raise RuntimeError("Split forbidden with zero-valued blocks avoidance") if dead_ops_elimination and toblas: raise RuntimeError("BLAS forbidden with zero-valued blocks avoidance") if dead_ops_elimination and v_type and v_type != VectStrategy.AUTO: raise RuntimeError("SIMDization forbidden with zero-valued blocks avoidance") if unroll and v_type and v_type != VectStrategy.AUTO: raise RuntimeError("SIMDization forbidden with unrolling") if rewrite == 'auto' and len(info['exprs']) > 1: warning("Rewrite mode=auto forbidden with multiple expressions") warning("Switching to rewrite mode=2") rewrite = 2 ### Optimization pipeline ### for loop_opt in loop_opts: # 0) Expression Rewriting if rewrite: loop_opt.rewrite(rewrite) # 1) Dead-operations elimination if dead_ops_elimination: loop_opt.eliminate_zeros() # 2) Splitting if split: loop_opt.split(split) # 3) Precomputation if precompute: loop_opt.precompute(precompute) # 4) Unroll/Unroll-and-jam if unroll: loop_opt.unroll(dict(unroll)) # 5) Vectorization if initialized and flatten(loop_opt.expr_domain_loops): vect = LoopVectorizer(loop_opt) if align_pad and not toblas: # Padding and data alignment vect.pad_and_align() if v_type and v_type != VectStrategy.AUTO: if isa['inst_set'] == 'SSE': raise RuntimeError("SSE vectorization not supported") # Specialize vectorization for the memory access pattern # of the expression vect.specialize(v_type, v_param) # 6) Conversion into blas calls if toblas: self.blas = loop_opt.blas(toblas) # Ensure kernel is always marked static inline # Remove either or both of static and inline (so that we get the order right) kernel.pred = [q for q in kernel.pred if q not in ['static', 'inline']] kernel.pred.insert(0, 'inline') kernel.pred.insert(0, 'static') return loop_opts
def ff_timeresolved(x, win=None, start=None, stop=None, step=None): """ Evaluates the empirical Fano Factor (FF) of the spike counts of a list of spike trains. By default computes the FF over the full time span of the data. However, it can compute the FF time-resolved as well. Given the vector v containing the observed spike counts (one per spike train) in the time window [t0, t1], the FF in [t0, t1] is: FF := var(v)/mean(v). The FF is usually computed for spike trains representing the activity of the same neuron over different trials. The higher the FF, the larger the cross-trial non-stationarity. For a time-stationary Poisson process, the theoretical FF is 1. Parameters ---------- x : list of SpikeTrain a list of spike trains for which to compute the FF of spike counts. win : Quantity or None (optional) Length of each time window over which to compute the FF. If None, the FF is computed over the largest window possible; otherwise, the window slides along time (see parameter step). Default: None start : Quantity or None (optional) starting time for the computation of the FF. If None, the largest t_start among those of the spike trains in x is used. Default: None stop : Quantity or None (optional) ending time for the computation of the FF. If None, the smallest t_stop among those of the spike trains in x is used. Default: None step : Quantity or None (optional) time shift between two consecutive sliding windows. If None, successive windows are adjacent. Default: None Returns ------- values: array array of FF values computed over consecutive time windows windows: array of shape (..., 2) array of time windows over which the FF has been computed """ # Compute max(t_start) and min(t_stop) and check consistency max_tstart = min([t.t_start for t in x]) min_tstop = max([t.t_stop for t in x]) if not (all([max_tstart == t.t_start for t in x]) and all([min_tstop == t.t_stop for t in x])): warnings.warning('spike trains have different t_start or t_stop' ' values. FF computed for inner values only') # Set start, stop, window length and step for the default cases t_start = max_tstart if start is None else start t_stop = min_tstop if stop is None else stop wlen = t_stop - t_start if win is None else win wstep = wlen if step is None else step # Convert all time quantities in dimensionless (_dl) units (meant in s) start_dl = float(t_start.simplified.base) stop_dl = float(t_stop.simplified.base) wlen_dl = float(wlen.simplified.base) step_dl = float(wstep.simplified.base) # Define the centers of the sliding windows where the FF must be computed ff_times = numpy.arange(wlen_dl / 2. + start_dl, stop_dl - wlen_dl / 2. + step_dl / 2, step_dl) # Define the windows within which the FF must be computed (as Nx2 array) windows = pq.s * numpy.array([numpy.max([ff_times - wlen_dl / 2., start_dl * numpy.ones( len(ff_times))], axis=0), numpy.min([ff_times + wlen_dl / 2., stop_dl * numpy.ones( len(ff_times))], axis=0)]).T windows = windows.rescale(x[0].units) # Compute the FF in each window define above ff_values = numpy.zeros(len(ff_times)) for i, w in enumerate(windows): x_sliced = [t.time_slice(w[0], w[1]) for t in x] ff_values[i] = fanofactor(x_sliced) return ff_values, windows