class CoordSet(HasTraits): # Hidden attributes containing the collection of objects _id = Unicode() _coords = List(allow_none=True) _references = Dict({}) _updated = Bool(False) # Hidden id and name of the object _id = Unicode() _name = Unicode() # Hidden attribute to specify if the collection is for a single dimension _is_same_dim = Bool(False) # other settings _copy = Bool(False) _sorted = Bool(True) _html_output = Bool(False) # default coord index _default = Int(0) # ------------------------------------------------------------------------------------------------------------------ # initialization # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. def __init__(self, *coords, **kwargs): """ A collection of Coord objects for a NDArray object with validation. This object is an iterable containing a collection of Coord objects. Parameters ---------- *coords : |NDarray|, |NDArray| subclass or |CoordSet| sequence of objects. If an instance of CoordSet is found, instead of an array, this means that all coordinates in this coords describe the same axis. It is assumed that the coordinates are passed in the order of the dimensions of a nD numpy array ( `row-major <https://docs.scipy.org/doc/numpy-1.14.1/glossary.html#term-row-major>`_ order), i.e., for a 3d object : 'z', 'y', 'x'. **kwargs: dict See other parameters. Other Parameters ---------------- x : |NDarray|, |NDArray| subclass or |CoordSet| A single coordinate associated to the 'x'-dimension. If a coord was already passed in the argument, this will overwrite the previous. It is thus not recommended to simultaneously use both way to initialize the coordinates to avoid such conflicts. y, z, u, ... : |NDarray|, |NDArray| subclass or |CoordSet| Same as `x` for the others dimensions. dims : list of string, optional Names of the dims to use corresponding to the coordinates. If not given, standard names are used: x, y, ... See Also -------- Coord : Explicit coordinates object. LinearCoord : Implicit coordinates object. NDDataset: The main object of SpectroChempy which makes use of CoordSet. Examples -------- >>> from spectrochempy import Coord, CoordSet Define 4 coordinates, with two for the same dimension >>> coord0 = Coord.linspace(10., 100., 5, units='m', title='distance') >>> coord1 = Coord.linspace(20., 25., 4, units='K', title='temperature') >>> coord1b = Coord.linspace(1., 10., 4, units='millitesla', title='magnetic field') >>> coord2 = Coord.linspace(0., 1000., 6, units='hour', title='elapsed time') Now create a coordset >>> cs = CoordSet(t=coord0, u=coord2, v=[coord1, coord1b]) Display some coordinates >>> cs.u Coord: [float64] hr (size: 6) >>> cs.v CoordSet: [_1:temperature, _2:magnetic field] >>> cs.v_1 Coord: [float64] K (size: 4) """ self._copy = kwargs.pop('copy', True) self._sorted = kwargs.pop('sorted', True) keepnames = kwargs.pop('keepnames', False) # if keepnames is false and the names of the dimensions are not passed in kwargs, then use dims if not none dims = kwargs.pop('dims', None) self.name = kwargs.pop('name', None) # initialise the coordinate list self._coords = [] # First evaluate passed args # -------------------------- # some cleaning if coords: if all([(isinstance(coords[i], (np.ndarray, NDArray, list, CoordSet)) or coords[i] is None) for i in range(len(coords))]): # Any instance of a NDArray can be accepted as coordinates for a dimension. # If an instance of CoordSet is found, this means that all # coordinates in this set describe the same axis coords = tuple(coords) elif is_sequence(coords) and len(coords) == 1: # if isinstance(coords[0], list): # coords = (CoordSet(*coords[0], sorted=False),) # else: coords = coords[0] if isinstance(coords, dict): # we have passed a dict, postpone to the kwargs evaluation process kwargs.update(coords) coords = None else: raise ValueError('Did not understand the inputs') # now store the args coordinates in self._coords (validation is fired when this attribute is set) if coords: for coord in coords[::-1]: # we fill from the end of the list # (in reverse order) because by convention when the # names are not specified, the order of the # coords follow the order of dims. if not isinstance(coord, CoordSet): if isinstance(coord, list): coord = CoordSet(*coord, sorted=False) elif not isinstance(coord, LinearCoord): # else coord = Coord(coord, copy=True) else: coord = cpy.deepcopy(coord) if not keepnames: if dims is None: # take the last available name of available names list coord.name = self.available_names.pop(-1) else: # use the provided list of dims coord.name = dims.pop(-1) self._append(coord) # append the coord (but instead of append, # use assignation -in _append - to fire the validation process ) # now evaluate keywords argument # ------------------------------ for key, coord in list(kwargs.items())[:]: # remove the already used kwargs (Fix: deprecation warning in Traitlets - all args, kwargs must be used) del kwargs[key] # prepare values to be either Coord, LinearCoord or CoordSet if isinstance(coord, (list, tuple)): coord = CoordSet( *coord, sorted=False ) # make sure in this case it becomes a CoordSet instance elif isinstance(coord, np.ndarray) or coord is None: coord = Coord( coord, copy=True ) # make sure it's a Coord # (even if it is None -> Coord(None) elif isinstance(coord, str) and coord in DEFAULT_DIM_NAME: # may be a reference to another coordinates (e.g. same coordinates for various dimensions) self._references[key] = coord # store this reference continue # Populate the coords with coord and coord's name. if isinstance(coord, (NDArray, Coord, LinearCoord, CoordSet)): # NDArray, if key in self.available_names or ( len(key) == 2 and key.startswith('_') and key[1] in list("123456789")): # ok we can find it as a canonical name: # this will overwrite any already defined coord value # which means also that kwargs have priority over args coord.name = key self._append(coord) elif not self.is_empty and key in self.names: # append when a coordinate with this name is already set in passed arg. # replace it idx = self.names.index(key) coord.name = key self._coords[idx] = coord else: raise KeyError( f'Probably an invalid key (`{key}`) for coordinates has been passed. ' f'Valid keys are among:{DEFAULT_DIM_NAME}') else: raise ValueError( f'Probably an invalid type of coordinates has been passed: {key}:{coord} ' ) # store the item (validation will be performed) # self._coords = _coords # inform the parent about the update self._updated = True # set a notifier on the name traits name of each coordinates for coord in self._coords: if coord is not None: HasTraits.observe(coord, self._coords_update, '_name') # initialize the base class with the eventual remaining arguments super().__init__(**kwargs) # .................................................................................................................. def implements(self, name=None): """ Utility to check if the current object implement `CoordSet`. Rather than isinstance(obj, CoordSet) use object.implements('CoordSet'). This is useful to check type without importing the module """ if name is None: return 'CoordSet' else: return name == 'CoordSet' # ------------------------------------------------------------------------------------------------------------------ # Validation methods # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. @validate('_coords') def _coords_validate(self, proposal): coords = proposal['value'] if not coords: return None for id, coord in enumerate(coords): if coord and not isinstance(coord, (Coord, LinearCoord, CoordSet)): raise TypeError( 'At this point all passed coordinates should be of type Coord or CoordSet!' ) # coord = # # Coord(coord) coords[id] = coord for coord in coords: if isinstance(coord, CoordSet): # it must be a single dimension axis # in this case we must have same length for all coordinates coord._is_same_dim = True # check this is valid in term of size try: coord.sizes except ValueError: raise # change the internal names n = len(coord) coord._set_names([ f"_{i + 1}" for i in range(n) ]) # we must have _1 for the first coordinates, # _2 the second, etc... coord._set_parent_dim(coord.name) # last check and sorting names = [] for coord in coords: if coord.has_defined_name: names.append(coord.name) else: raise ValueError( 'At this point all passed coordinates should have a valid name!' ) if coords: if self._sorted: _sortedtuples = sorted( (coord.name, coord) for coord in coords) # Final sort coords = list(zip(*_sortedtuples))[1] return list(coords) # be sure its a list not a tuple else: return None # .................................................................................................................. @default('_id') def _id_default(self): # a unique id return f"{type(self).__name__}_{str(uuid.uuid1()).split('-')[0]}" # ------------------------------------------------------------------------------------------------------------------ # Readonly Properties # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. @property def available_names(self): """ Chars that can be used for dimension name (DEFAULT_DIM_NAMES less those already in use) """ _available_names = DEFAULT_DIM_NAME.copy() for item in self.names: if item in _available_names: _available_names.remove(item) return _available_names # .................................................................................................................. @property def coords(self): """ list -Coordinates in the coordset """ return self._coords # .................................................................................................................. @property def has_defined_name(self): """ bool - True is the name has been defined """ return not (self.name == self.id) # .................................................................................................................. @property def id(self): """ str - Object identifier (Readonly property). """ return self._id # .................................................................................................................. @property def is_empty(self): """ bool - True if there is no coords defined. """ if self._coords: return len(self._coords) == 0 else: return False # .................................................................................................................. @property def is_same_dim(self): """ bool - True if the coords define a single dimension """ return self._is_same_dim # .................................................................................................................. @property def references(self): return self._references # .................................................................................................................. @property def sizes(self): """int or tuple of int - Sizes of the coord object for each dimension (readonly property). If the set is for a single dimension return a single size as all coordinates must have the same. """ _sizes = [] for i, item in enumerate(self._coords): _sizes.append(item.size) # recurrence if item is a CoordSet if self.is_same_dim: _sizes = list(set(_sizes)) if len(_sizes) > 1: raise ValueError( 'Coordinates must be of the same size for a dimension with multiple coordinates' ) return _sizes[0] return _sizes # alias size = sizes # .................................................................................................................. # @property # def coords(self): #TODO: replace with itertiems, items etc ... to simulate a dict # """list - list of the Coord objects in the current coords (readonly # property). # """ # return self._coords # .................................................................................................................. @property def names(self): """list - Names of the coords in the current coords (read only property) """ _names = [] if self._coords: for item in self._coords: if item.has_defined_name: _names.append(item.name) return _names # ------------------------------------------------------------------------------------------------------------------ # Mutable Properties # ------------------------------------------------------------------------------------------------------------------ @property def default(self): """ Coord - default coordinates """ return self[self._default] @property def data(self): # in case data is called on a coordset for dimension with multiple coordinates # return the first coordinates return self.default.data # .................................................................................................................. @property def name(self): if self._name: return self._name else: return self._id @name.setter def name(self, value): if value is not None: self._name = value # .................................................................................................................. @property def titles(self): """list - Titles of the coords in the current coords """ _titles = [] for item in self._coords: if isinstance(item, NDArray): _titles.append( item.title if item.title else item.name) # TODO:name elif isinstance(item, CoordSet): _titles.append([ el.title if el.title else el.name for el in item ]) # TODO:name else: raise ValueError('Something wrong with the titles!') return _titles # .................................................................................................................. @property def labels(self): """list - Labels of the coordinates in the current coordset """ return [item.labels for item in self] # .................................................................................................................. @property def units(self): """ list - Units of the coords in the current coords """ return [item.units for item in self] # ------------------------------------------------------------------------------------------------------------------ # public methods # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. def copy(self, keepname=False): """ Make a disconnected copy of the current coords. Returns ------- object an exact copy of the current object """ return self.__copy__() # .................................................................................................................. def keys(self): """ Alias for names Returns ------- out : list list of all coordinates names (including reference to other coordinates) """ keys = [] if self.names: keys.extend(self.names) if self._references: keys.extend(list(self.references.keys())) return keys # .................................................................................................................. def select(self, val): """ Select the default coord index """ self._default = min(max(0, int(val) - 1), len(self.names)) # ................................................................................................................. def set(self, *args, **kwargs): """ Set one or more coordinates in the current CoordSet Parameters ---------- args kwargs Returns ------- """ if not args and not kwargs: return if len(args) == 1 and (is_sequence(args[0]) or isinstance(args[0], CoordSet)): args = args[0] if isinstance(args, CoordSet): kwargs.update(args.to_dict()) args = () if args: self._coords = [] # reset for i, item in enumerate(args[::-1]): item.name = self.available_names.pop() self._append(item) for k, item in kwargs.items(): if isinstance(item, CoordSet): # try to keep this parameter to True! item._is_same_dim = True self[k] = item # .................................................................................................................. def set_titles(self, *args, **kwargs): """ Set one or more coord title at once Notes ----- If the args are not named, then the attributions are made in coordinate's name alhabetical order : e.g, the first title will be for the `x` coordinates, the second for the `y`, etc. Parameters ---------- args : str(s) The list of titles to apply to the set of coordinates (they must be given according to the coordinate's name alphabetical order kwargs : str keyword attribution of the titles. The keys must be valid names among the coordinate's name list. This is the recommended way to set titles as this will be less prone to errors. """ if len(args) == 1 and (is_sequence(args[0]) or isinstance(args[0], CoordSet)): args = args[0] for i, item in enumerate(args): if not isinstance(self[i], CoordSet): self[i].title = item else: if is_sequence(item): for j, v in enumerate(self[i]): v.title = item[j] for k, item in kwargs.items(): self[k].title = item # .................................................................................................................. def set_units(self, *args, **kwargs): """ Set one or more coord units at once. Notes ----- If the args are not named, then the attributions are made in coordinate's name alhabetical order : e.g, the first units will be for the `x` coordinates, the second for the `y`, etc. Parameters ---------- args : str(s) The list of units to apply to the set of coordinates (they must be given according to the coordinate's name alphabetical order kwargs : str keyword attribution of the units. The keys must be valid names among the coordinate's name list. This is the recommended way to set units as this will be less prone to errors. force : bool, optional, default=False whether or not the new units must be compatible with the current units. See the `Coord`.`to` method. """ force = kwargs.pop('force', False) if len(args) == 1 and is_sequence(args[0]): args = args[0] for i, item in enumerate(args): if not isinstance(self[i], CoordSet): self[i].to(item, force=force, inplace=True) else: if is_sequence(item): for j, v in enumerate(self[i]): v.to(item[j], force=force, inplace=True) for k, item in kwargs.items(): self[k].to(item, force=force, inplace=True) # .................................................................................................................. def to_dict(self): """ Return a dict of the coordinates from the coordset Returns ------- out : dict A dictionary where keys are the names of the coordinates, and the values the coordinates themselves """ return dict(zip(self.names, self._coords)) # .................................................................................................................. def update(self, **kwargs): """ Update a specific coordinates in the CoordSet. Parameters ---------- kwarg : Only keywords among the CoordSet.names are allowed - they denotes the name of a dimension. """ dims = kwargs.keys() for dim in list(dims)[:]: if dim in self.names: # we can replace the given coordinates idx = self.names.index(dim) self[idx] = Coord(kwargs.pop(dim), name=dim) # ------------------------------------------------------------------------------------------------------------------ # private methods # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. def _append(self, coord): # utility function to append coordinate with full validation if not isinstance(coord, tuple): coord = (coord, ) if self._coords: # some coordinates already present, prepend the new one self._coords = (*coord, ) + tuple( self._coords) # instead of append, fire the validation process else: # no coordinates yet, start a new tuple of coordinate self._coords = (*coord, ) # .................................................................................................................. def _loc2index(self, loc): # Return the index of a location for coord in self.coords: try: return coord._loc2index(loc) except IndexError: continue # not found! raise IndexError # .................................................................................................................. def _set_names(self, names): # utility function to change names of coordinates (in batch) # useful when a coordinate is a CoordSet itself for coord, name in zip(self._coords, names): coord.name = name # .................................................................................................................. def _set_parent_dim(self, name): # utility function to set the paretn name for sub coordset for coord in self._coords: coord._parent_dim = name # ------------------------------------------------------------------------------------------------------------------ # special methods # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. @staticmethod def __dir__(): return ['coords', 'references', 'is_same_dim', 'name'] # .................................................................................................................. def __call__(self, *args, **kwargs): # allow the following syntax: coords(), coords(0,2) or coords = [] axis = kwargs.get('axis', None) if args: for idx in args: coords.append(self[idx]) elif axis is not None: if not is_sequence(axis): axis = [axis] for i in axis: coords.append(self[i]) else: coords = self._coords if len(coords) == 1: return coords[0] else: return CoordSet(*coords) # .................................................................................................................. def __hash__(self): # all instance of this class has same hash, so they can be compared return hash(tuple(self._coords)) # .................................................................................................................. def __len__(self): return len(self._coords) def __delattr__(self, item): if 'notify_change' in item: pass else: try: return self.__delitem__(item) except (IndexError, KeyError): raise AttributeError # .................................................................................................................. def __getattr__(self, item): # when the attribute was not found if '_validate' in item or '_changed' in item: raise AttributeError try: return self.__getitem__(item) except (IndexError, KeyError): raise AttributeError # .................................................................................................................. def __getitem__(self, index): if isinstance(index, str): # find by name if index in self.names: idx = self.names.index(index) return self._coords.__getitem__(idx) # ok we did not find it! # let's try in references if index in self._references.keys(): return self._references[index] # let's try in the title if index in self.titles: # selection by coord titles if self.titles.count(index) > 1: warnings.warn( f"Getting a coordinate from its title. However `{index}` occurs several time. Only" f" the first occurence is returned!") return self._coords.__getitem__(self.titles.index(index)) # may be it is a title or a name in a sub-coords for item in self._coords: if isinstance(item, CoordSet) and index in item.titles: # selection by subcoord title return item.__getitem__(item.titles.index(index)) for item in self._coords: if isinstance(item, CoordSet) and index in item.names: # selection by subcoord name return item.__getitem__(item.names.index(index)) try: # let try with the canonical dimension names if index[0] in self.names: # ok we can find it a a canonical name: c = self._coords.__getitem__(self.names.index(index[0])) if len(index) > 1 and index[1] == '_': if isinstance(c, CoordSet): c = c.__getitem__(index[1:]) else: c = c.__getitem__(index[2:]) # try on labels return c except IndexError: pass raise KeyError( f"Could not find `{index}` in coordinates names or titles") try: self._coords.__getitem__(index) except TypeError: print() res = self._coords.__getitem__(index) if isinstance(index, slice): if isinstance(res, CoordSet): res = (res, ) return CoordSet(*res, keepnames=True) else: return res # .................................................................................................................. def __setattr__(self, key, value): keyb = key[1:] if key.startswith('_') else key if keyb in [ 'parent', 'copy', 'sorted', 'coords', 'updated', 'name', 'html_output', 'is_same_dim', 'parent_dim', 'trait_values', 'trait_notifiers', 'trait_validators', 'cross_validation_lock', 'notify_change' ]: super().__setattr__(key, value) return try: self.__setitem__(key, value) except Exception: super().__setattr__(key, value) # .................................................................................................................. def __setitem__(self, index, coord): try: coord = coord.copy( keepname=True) # to avoid modifying the original except TypeError as e: if isinstance(coord, list): coord = [c.copy(keepname=True) for c in coord[:]] else: raise e if isinstance(index, str): # find by name if index in self.names: idx = self.names.index(index) coord.name = index self._coords.__setitem__(idx, coord) return # ok we did not find it! # let's try in the title if index in self.titles: # selection by coord titles if self.titles.count(index) > 1: warnings.warn( f"Getting a coordinate from its title. However `{index}` occurs several time. Only" f" the first occurence is returned!") index = self.titles.index(index) coord.name = self.names[index] self._coords.__setitem__(index, coord) return # may be it is a title or a name in a sub-coords for item in self._coords: if isinstance(item, CoordSet) and index in item.titles: # selection by subcoord title index = item.titles.index(index) coord.name = item.names[index] item.__setitem__(index, coord) return for item in self._coords: if isinstance(item, CoordSet) and index in item.names: # selection by subcoord title index = item.names.index(index) coord.name = item.names[index] item.__setitem__(index, coord) return try: # let try with the canonical dimension names if index[0] in self.names: # ok we can find it a a canonical name: c = self._coords.__getitem__(self.names.index(index[0])) if len(index) > 1 and index[1] == '_': c.__setitem__(index[1:], coord) return except KeyError: pass # add the new coordinates if index in self.available_names or ( len(index) == 2 and index.startswith('_') and index[1] in list("123456789")): coord.name = index self._coords.append(coord) return else: raise KeyError( f"Could not find `{index}` in coordinates names or titles") self._coords[index] = coord # .................................................................................................................. def __delitem__(self, index): if isinstance(index, str): # find by name if index in self.names: idx = self.names.index(index) del self._coords[idx] return # let's try in the title if index in self.titles: # selection by coord titles index = self.titles.index(index) self._coords.__delitem__(index) return # may be it is a title in a sub-coords for item in self._coords: if isinstance(item, CoordSet) and index in item.titles: # selection by subcoord title return item.__delitem__(index) # let try with the canonical dimension names if index[0] in self.names: # ok we can find it a a canonical name: c = self._coords.__getitem__(self.names.index(index[0])) if len(index) > 1 and index[1] == '_': if isinstance(c, CoordSet): return c.__delitem__(index[1:]) raise KeyError( f"Could not find `{index}` in coordinates names or titles") # .................................................................................................................. # def __iter__(self): # for item in self._coords: # yield item # .................................................................................................................. def __repr__(self): out = "CoordSet: [" + ', '.join(['{}'] * len(self._coords)) + "]" s = [] for item in self._coords: if isinstance(item, CoordSet): s.append(f"{item.name}:" + repr(item).replace('CoordSet: ', '')) else: s.append(f"{item.name}:{item.title}") out = out.format(*s) return out # .................................................................................................................. def __str__(self): return repr(self) # .................................................................................................................. def _cstr(self, header=' coordinates: ... \n', print_size=True): txt = '' for idx, dim in enumerate(self.names): coord = getattr(self, dim) if coord: dimension = f' DIMENSION `{dim}`' for k, v in self.references.items(): if dim == v: # reference to this dimension dimension += f'=`{k}`' txt += dimension + '\n' if isinstance(coord, CoordSet): # txt += ' index: {}\n'.format(idx) if not coord.is_empty: if print_size: txt += f'{coord[0]._str_shape().rstrip()}\n' coord._html_output = self._html_output for idx_s, dim_s in enumerate(coord.names): c = getattr(coord, dim_s) txt += f' ({dim_s}) ...\n' c._html_output = self._html_output sub = c._cstr(header=' coordinates: ... \n', print_size=False ) # , indent=4, first_indent=-6) txt += f"{sub}\n" elif not coord.is_empty: # coordinates if available # txt += ' index: {}\n'.format(idx) coord._html_output = self._html_output txt += '{}\n'.format( coord._cstr(header=header, print_size=print_size)) txt = txt.rstrip() # remove the trailing '\n' if not self._html_output: return colored_output(txt.rstrip()) else: return txt.rstrip() # .................................................................................................................. def _repr_html_(self): return convert_to_html(self) # .................................................................................................................. def __deepcopy__(self, memo): coords = self.__class__(tuple( cpy.deepcopy(ax, memo=memo) for ax in self), keepnames=True) coords.name = self.name coords._is_same_dim = self._is_same_dim coords._default = self._default return coords # .................................................................................................................. def __copy__(self): coords = self.__class__(tuple(cpy.copy(ax) for ax in self), keepnames=True) # name must be changed coords.name = self.name # and is_same_dim and default for coordset coords._is_same_dim = self._is_same_dim coords._default = self._default return coords # .................................................................................................................. def __eq__(self, other): if other is None: return False try: return self._coords == other._coords except Exception: return False # .................................................................................................................. def __ne__(self, other): return not self.__eq__(other) # ------------------------------------------------------------------------------------------------------------------ # Events # ------------------------------------------------------------------------------------------------------------------ # .................................................................................................................. def _coords_update(self, change): # when notified that a coord name have been updated self._updated = True # .................................................................................................................. @observe(All) def _anytrait_changed(self, change): # ex: change { # 'owner': object, # The HasTraits instance # 'new': 6, # The new value # 'old': 5, # The old value # 'name': "foo", # The name of the changed trait # 'type': 'change', # The event type of the notification, usually 'change' # } if change.name == '_updated' and change.new: self._updated = False # reset
class SqlMagic(Magics, Configurable): """Runs SQL statement on a database, specified by SQLAlchemy connect string. Provides the %%sql magic.""" autolimit = Int( 0, config=True, help="Automatically limit the size of the returned result sets") style = Unicode( 'DEFAULT', config=True, help= "Set the table printing style to any of prettytable's defined styles (currently DEFAULT, MSWORD_FRIENDLY, PLAIN_COLUMNS, RANDOM)" ) short_errors = Bool( True, config=True, help="Don't display the full traceback on SQL Programming Error") displaylimit = Int( 0, config=True, help= "Automatically limit the number of rows displayed (full result set is still stored)" ) autopandas = Bool( False, config=True, help="Return Pandas DataFrames instead of regular result sets") column_local_vars = Bool( False, config=True, help="Return data into local variables from column names") feedback = Bool(True, config=True, help="Print number of rows affected by DML") dsn_filename = Unicode('odbc.ini', config=True, help="Path to DSN file. " "When the first argument is of the form [section], " "a sqlalchemy connection string is formed from the " "matching section in the DSN file.") def __init__(self, shell): Configurable.__init__(self, config=shell.config) Magics.__init__(self, shell=shell) # Add ourself to the list of module configurable via %config self.shell.configurables.append(self) @needs_local_scope @line_magic('sql') @cell_magic('sql') def execute(self, line, cell='', local_ns={}): """Runs SQL statement against a database, specified by SQLAlchemy connect string. If no database connection has been established, first word should be a SQLAlchemy connection string, or the user@db name of an established connection. Examples:: %%sql postgresql://me:mypw@localhost/mydb SELECT * FROM mytable %%sql me@mydb DELETE FROM mytable %%sql DROP TABLE mytable SQLAlchemy connect string syntax examples: postgresql://me:mypw@localhost/mydb sqlite:// mysql+pymysql://me:mypw@localhost/mydb """ # save globals and locals so they can be referenced in bind vars user_ns = self.shell.user_ns.copy() user_ns.update(local_ns) parsed = sql.parse.parse('%s\n%s' % (line, cell), self) conn = sql.connection.Connection.get(parsed['connection']) first_word = parsed['sql'].split(None, 1)[:1] if first_word and first_word[0].lower() == 'persist': return self._persist_dataframe(parsed['sql'], conn, user_ns) try: result = sql.run.run(conn, parsed['sql'], self, user_ns) if result and ~isinstance(result, str) and self.column_local_vars: #Instead of returning values, set variables directly in the #users namespace. Variable names given by column names if self.autopandas: keys = result.keys() else: keys = result.keys result = result.dict() if self.feedback: print('Returning data to local variables [{}]'.format( ', '.join(keys))) self.shell.user_ns.update(result) return None else: #Return results into the default ipython _ variable return result except (ProgrammingError, OperationalError) as e: # Sqlite apparently return all errors as OperationalError :/ if self.short_errors: print(e) else: raise legal_sql_identifier = re.compile(r'^[A-Za-z0-9#_$]+') def _persist_dataframe(self, raw, conn, user_ns): if not DataFrame: raise ImportError("Must `pip install pandas` to use DataFrames") pieces = raw.split() if len(pieces) != 2: raise SyntaxError( "Format: %sql [connection] persist <DataFrameName>") frame_name = pieces[1].strip(';') frame = eval(frame_name, user_ns) if not isinstance(frame, DataFrame) and not isinstance(frame, Series): raise TypeError('%s is not a Pandas DataFrame or Series' % frame_name) table_name = frame_name.lower() table_name = self.legal_sql_identifier.search(table_name).group(0) frame.to_sql(table_name, conn.session.engine) return 'Persisted %s' % table_name
class Repo2Docker(Application): """An application for converting git repositories to docker images""" name = "jupyter-repo2docker" version = __version__ description = __doc__ @default("log_level") def _default_log_level(self): """The application's default log level""" return logging.INFO git_workdir = Unicode( None, config=True, allow_none=True, help=""" Working directory to use for check out of git repositories. The default is to use the system's temporary directory. Should be somewhere ephemeral, such as /tmp. """, ) subdir = Unicode( "", config=True, help=""" Subdirectory of the git repository to examine. Defaults to ''. """, ) cache_from = List( [], config=True, help=""" List of images to try & re-use cached image layers from. Docker only tries to re-use image layers from images built locally, not pulled from a registry. We can ask it to explicitly re-use layers from non-locally built images by through the 'cache_from' parameter. """, ) buildpacks = List( [ LegacyBinderDockerBuildPack, DockerBuildPack, JuliaProjectTomlBuildPack, JuliaRequireBuildPack, NixBuildPack, RBuildPack, CondaBuildPack, PipfileBuildPack, PythonBuildPack, ], config=True, help=""" Ordered list of BuildPacks to try when building a git repository. """, ) extra_build_kwargs = Dict( {}, help=""" extra kwargs to limit CPU quota when building a docker image. Dictionary that allows the user to set the desired runtime flag to configure the amount of access to CPU resources your container has. Reference https://docs.docker.com/config/containers/resource_constraints/#cpu """, config=True, ) extra_run_kwargs = Dict( {}, help=""" extra kwargs to limit CPU quota when running a docker image. Dictionary that allows the user to set the desired runtime flag to configure the amount of access to CPU resources your container has. Reference https://docs.docker.com/config/containers/resource_constraints/#cpu """, config=True, ) default_buildpack = Any( PythonBuildPack, config=True, help=""" The default build pack to use when no other buildpacks are found. """, ) # Git is our content provider of last resort. This is to maintain the # old behaviour when git and local directories were the only supported # content providers. We can detect local directories from the path, but # detecting if something will successfully `git clone` is very hard if all # you can do is look at the path/URL to it. content_providers = List( [ contentproviders.Local, contentproviders.Zenodo, contentproviders.Figshare, contentproviders.Dataverse, contentproviders.Hydroshare, contentproviders.Swhid, contentproviders.Mercurial, contentproviders.Git, ], config=True, help=""" Ordered list by priority of ContentProviders to try in turn to fetch the contents specified by the user. """, ) build_memory_limit = ByteSpecification( 0, help=""" Total memory that can be used by the docker image building process. Set to 0 for no limits. """, config=True, ) volumes = Dict( {}, help=""" Volumes to mount when running the container. Only used when running, not during build process! Use a key-value pair, with the key being the volume source & value being the destination volume. Both source and destination can be relative. Source is resolved relative to the current working directory on the host, and destination is resolved relative to the working directory of the image - ($HOME by default) """, config=True, ) user_id = Int( help=""" UID of the user to create inside the built image. Should be a uid that is not currently used by anything in the image. Defaults to uid of currently running user, since that is the most common case when running r2d manually. Might not affect Dockerfile builds. """, config=True, ) @default("user_id") def _user_id_default(self): """ Default user_id to current running user. """ return os.geteuid() user_name = Unicode( "jovyan", help=""" Username of the user to create inside the built image. Should be a username that is not currently used by anything in the image, and should conform to the restrictions on user names for Linux. Defaults to username of currently running user, since that is the most common case when running repo2docker manually. """, config=True, ) @default("user_name") def _user_name_default(self): """ Default user_name to current running user. """ return getpass.getuser() appendix = Unicode( config=True, help=""" Appendix of Dockerfile commands to run at the end of the build. Can be used to customize the resulting image after all standard build steps finish. """, ) json_logs = Bool( False, help=""" Log output in structured JSON format. Useful when stdout is consumed by other tools """, config=True, ) repo = Unicode( ".", help=""" Specification of repository to build image for. Could be local path or git URL. """, config=True, ) ref = Unicode( None, help=""" Git ref that should be built. If repo is a git repository, this ref is checked out in a local clone before repository is built. """, config=True, allow_none=True, ) swh_token = Unicode( None, help=""" Token to use authenticated SWH API access. If unset, default to unauthenticated (limited) usage of the Software Heritage API. """, config=True, allow_none=True, ) cleanup_checkout = Bool( False, help=""" Delete source repository after building is done. Useful when repo2docker is doing the git cloning """, config=True, ) output_image_spec = Unicode( "", help=""" Docker Image name:tag to tag the built image with. Required parameter. """, config=True, ) push = Bool( False, help=""" Set to true to push docker image after building """, config=True, ) run = Bool( False, help=""" Run docker image after building """, config=True, ) # FIXME: Refactor class to be able to do --no-build without needing # deep support for it inside other code dry_run = Bool( False, help=""" Do not actually build the docker image, just simulate it. """, config=True, ) # FIXME: Refactor classes to separate build & run steps run_cmd = List( [], help=""" Command to run when running the container When left empty, a jupyter notebook is run. """, config=True, ) all_ports = Bool( False, help=""" Publish all declared ports from container whiel running. Equivalent to -P option to docker run """, config=True, ) ports = Dict( {}, help=""" Port mappings to establish when running the container. Equivalent to -p {key}:{value} options to docker run. {key} refers to port inside container, and {value} refers to port / host:port in the host """, config=True, ) environment = List( [], help=""" Environment variables to set when running the built image. Each item must be a string formatted as KEY=VALUE """, config=True, ) target_repo_dir = Unicode( "", help=""" Path inside the image where contents of the repositories are copied to, and where all the build operations (such as postBuild) happen. Defaults to ${HOME} if not set """, config=True, ) engine = Unicode( "docker", config=True, help=""" Name of the container engine. Defaults to 'docker'. """, ) def get_engine(self): """Return an instance of the container engine. Currently no arguments are passed to the engine constructor. """ engines = entrypoints.get_group_named("repo2docker.engines") try: entry = engines[self.engine] except KeyError: raise ContainerEngineException( "Container engine '{}' not found. Available engines: {}". format(self.engine, ",".join(engines.keys()))) engine_class = entry.load() return engine_class(parent=self) def fetch(self, url, ref, checkout_path): """Fetch the contents of `url` and place it in `checkout_path`. The `ref` parameter specifies what "version" of the contents should be fetched. In the case of a git repository `ref` is the SHA-1 of a commit. Iterate through possible content providers until a valid provider, based on URL, is found. """ picked_content_provider = None for ContentProvider in self.content_providers: cp = ContentProvider() spec = cp.detect(url, ref=ref) if spec is not None: picked_content_provider = cp self.log.info("Picked {cp} content " "provider.\n".format(cp=cp.__class__.__name__)) break if picked_content_provider is None: self.log.error("No matching content provider found for " "{url}.".format(url=url)) swh_token = self.config.get("swh_token", self.swh_token) if swh_token and isinstance(picked_content_provider, contentproviders.Swhid): picked_content_provider.set_auth_token(swh_token) for log_line in picked_content_provider.fetch( spec, checkout_path, yield_output=self.json_logs): self.log.info(log_line, extra=dict(phase="fetching")) if not self.output_image_spec: image_spec = "r2d" + self.repo # if we are building from a subdirectory include that in the # image name so we can tell builds from different sub-directories # apart. if self.subdir: image_spec += self.subdir if picked_content_provider.content_id is not None: image_spec += picked_content_provider.content_id else: image_spec += str(int(time.time())) self.output_image_spec = escapism.escape(image_spec, escape_char="-").lower() def json_excepthook(self, etype, evalue, traceback): """Called on an uncaught exception when using json logging Avoids non-JSON output on errors when using --json-logs """ self.log.error( "Error during build: %s", evalue, exc_info=(etype, evalue, traceback), extra=dict(phase="failed"), ) def initialize(self): """Init repo2docker configuration before start""" # FIXME: Remove this function, move it to setters / traitlet reactors if self.json_logs: # register JSON excepthook to avoid non-JSON output on errors sys.excepthook = self.json_excepthook # Need to reset existing handlers, or we repeat messages logHandler = logging.StreamHandler() formatter = jsonlogger.JsonFormatter() logHandler.setFormatter(formatter) self.log = logging.getLogger("repo2docker") self.log.handlers = [] self.log.addHandler(logHandler) self.log.setLevel(self.log_level) else: # due to json logger stuff above, # our log messages include carriage returns, newlines, etc. # remove the additional newline from the stream handler self.log.handlers[0].terminator = "" # We don't want a [Repo2Docker] on all messages self.log.handlers[0].formatter = logging.Formatter( fmt="%(message)s") if self.dry_run and (self.run or self.push): raise ValueError( "Cannot push or run image if we are not building it") if self.volumes and not self.run: raise ValueError("Cannot mount volumes if container is not run") def push_image(self): """Push docker image to registry""" client = self.get_engine() # Build a progress setup for each layer, and only emit per-layer # info every 1.5s progress_layers = {} layers = {} last_emit_time = time.time() for chunk in client.push(self.output_image_spec): if client.string_output: self.log.info(chunk, extra=dict(phase="pushing")) continue # else this is Docker output # each chunk can be one or more lines of json events # split lines here in case multiple are delivered at once for line in chunk.splitlines(): line = line.decode("utf-8", errors="replace") try: progress = json.loads(line) except Exception as e: self.log.warning("Not a JSON progress line: %r", line) continue if "error" in progress: self.log.error(progress["error"], extra=dict(phase="failed")) raise ImageLoadError(progress["error"]) if "id" not in progress: continue # deprecated truncated-progress data if "progressDetail" in progress and progress["progressDetail"]: progress_layers[ progress["id"]] = progress["progressDetail"] else: progress_layers[progress["id"]] = progress["status"] # include full progress data for each layer in 'layers' data layers[progress["id"]] = progress if time.time() - last_emit_time > 1.5: self.log.info( "Pushing image\n", extra=dict(progress=progress_layers, layers=layers, phase="pushing"), ) last_emit_time = time.time() self.log.info( "Successfully pushed {}".format(self.output_image_spec), extra=dict(phase="pushing"), ) def run_image(self): """Run docker container from built image and wait for it to finish. """ container = self.start_container() self.wait_for_container(container) def start_container(self): """Start docker container from built image Returns running container """ client = self.get_engine() docker_host = os.environ.get("DOCKER_HOST") if docker_host: host_name = urlparse(docker_host).hostname else: host_name = "127.0.0.1" self.hostname = host_name if not self.run_cmd: port = str(self._get_free_port()) self.port = port # To use the option --NotebookApp.custom_display_url # make sure the base-notebook image is updated: # docker pull jupyter/base-notebook run_cmd = [ "jupyter", "notebook", "--ip", "0.0.0.0", "--port", port, "--NotebookApp.custom_display_url=http://{}:{}".format( host_name, port), ] ports = {"%s/tcp" % port: port} else: # run_cmd given by user, if port is also given then pass it on run_cmd = self.run_cmd if self.ports: ports = self.ports else: ports = {} # store ports on self so they can be retrieved in tests self.ports = ports container_volumes = {} if self.volumes: image = client.inspect_image(self.output_image_spec) image_workdir = image.config["WorkingDir"] for k, v in self.volumes.items(): container_volumes[os.path.abspath(k)] = { "bind": v if v.startswith("/") else os.path.join(image_workdir, v), "mode": "rw", } run_kwargs = dict( publish_all_ports=self.all_ports, ports=ports, command=run_cmd, volumes=container_volumes, environment=self.environment, ) run_kwargs.update(self.extra_run_kwargs) container = client.run(self.output_image_spec, **run_kwargs) while container.status == "created": time.sleep(0.5) container.reload() return container def wait_for_container(self, container): """Wait for a container to finish Displaying logs while it's running """ last_timestamp = None try: for line in container.logs(stream=True, timestamps=True): line = line.decode("utf-8") last_timestamp, line = line.split(" ", maxsplit=1) self.log.info(line, extra=dict(phase="running")) finally: container.reload() if container.status == "running": self.log.info("Stopping container...\n", extra=dict(phase="running")) container.kill() exit_code = container.exitcode container.wait() self.log.info("Container finished running.\n".upper(), extra=dict(phase="running")) # are there more logs? Let's send them back too late_logs = container.logs(since=last_timestamp).decode("utf-8") for line in late_logs.split("\n"): self.log.debug(line + "\n", extra=dict(phase="running")) container.remove() if exit_code: sys.exit(exit_code) def _get_free_port(self): """ Hacky method to get a free random port on local host """ import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("", 0)) port = s.getsockname()[1] s.close() return port def find_image(self): # if this is a dry run it is Ok for dockerd to be unreachable so we # always return False for dry runs. if self.dry_run: return False # check if we already have an image for this content client = self.get_engine() for image in client.images(): for tag in image.tags: if tag == self.output_image_spec + ":latest": return True return False def build(self): """ Build docker image """ # Check if r2d can connect to docker daemon if not self.dry_run: try: docker_client = self.get_engine() except ContainerEngineException as e: self.log.error("\nContainer engine initialization error: %s\n", e) self.exit(1) # If the source to be executed is a directory, continue using the # directory. In the case of a local directory, it is used as both the # source and target. Reusing a local directory seems better than # making a copy of it as it might contain large files that would be # expensive to copy. if os.path.isdir(self.repo): checkout_path = self.repo else: if self.git_workdir is None: checkout_path = tempfile.mkdtemp(prefix="repo2docker") else: checkout_path = self.git_workdir try: self.fetch(self.repo, self.ref, checkout_path) if self.find_image(): self.log.info("Reusing existing image ({}), not " "building.".format(self.output_image_spec)) # no need to build, so skip to the end by `return`ing here # this will still execute the finally clause and let's us # avoid having to indent the build code by an extra level return if self.subdir: checkout_path = os.path.join(checkout_path, self.subdir) if not os.path.isdir(checkout_path): self.log.error( "Subdirectory %s does not exist", self.subdir, extra=dict(phase="failure"), ) raise FileNotFoundError( "Could not find {}".format(checkout_path)) with chdir(checkout_path): for BP in self.buildpacks: bp = BP() if bp.detect(): picked_buildpack = bp break else: picked_buildpack = self.default_buildpack() picked_buildpack.appendix = self.appendix # Add metadata labels picked_buildpack.labels["repo2docker.version"] = self.version repo_label = "local" if os.path.isdir(self.repo) else self.repo picked_buildpack.labels["repo2docker.repo"] = repo_label picked_buildpack.labels["repo2docker.ref"] = self.ref if self.dry_run: print(picked_buildpack.render()) else: self.log.debug(picked_buildpack.render(), extra=dict(phase="building")) if self.user_id == 0: raise ValueError( "Root as the primary user in the image is not permitted." ) build_args = { "NB_USER": self.user_name, "NB_UID": str(self.user_id), } if self.target_repo_dir: build_args["REPO_DIR"] = self.target_repo_dir self.log.info( "Using %s builder\n", bp.__class__.__name__, extra=dict(phase="building"), ) for l in picked_buildpack.build( docker_client, self.output_image_spec, self.build_memory_limit, build_args, self.cache_from, self.extra_build_kwargs, ): if docker_client.string_output: self.log.info(l, extra=dict(phase="building")) # else this is Docker output elif "stream" in l: self.log.info(l["stream"], extra=dict(phase="building")) elif "error" in l: self.log.info(l["error"], extra=dict(phase="failure")) raise BuildError(l["error"]) elif "status" in l: self.log.info("Fetching base image...\r", extra=dict(phase="building")) else: self.log.info(json.dumps(l), extra=dict(phase="building")) finally: # Cleanup checkout if necessary if self.cleanup_checkout: shutil.rmtree(checkout_path, ignore_errors=True) def start(self): self.build() if self.push: self.push_image() if self.run: self.run_image()
class Widget(LoggingHasTraits): #------------------------------------------------------------------------- # Class attributes #------------------------------------------------------------------------- _widget_construction_callback = None # widgets is a dictionary of all active widget objects widgets = {} # widget_types is a registry of widgets by module, version, and name: widget_types = WidgetRegistry() @classmethod def close_all(cls): for widget in list(cls.widgets.values()): widget.close() @staticmethod def on_widget_constructed(callback): """Registers a callback to be called when a widget is constructed. The callback must have the following signature: callback(widget)""" Widget._widget_construction_callback = callback @staticmethod def _call_widget_constructed(widget): """Static method, called when a widget is constructed.""" if Widget._widget_construction_callback is not None and callable(Widget._widget_construction_callback): Widget._widget_construction_callback(widget) @staticmethod def handle_comm_opened(comm, msg): """Static method, called when a widget is constructed.""" version = msg.get('metadata', {}).get('version', '') if version.split('.')[0] != PROTOCOL_VERSION_MAJOR: raise ValueError("Incompatible widget protocol versions: received version %r, expected version %r"%(version, __protocol_version__)) data = msg['content']['data'] state = data['state'] # Find the widget class to instantiate in the registered widgets widget_class = Widget.widget_types.get(state['_model_module'], state['_model_module_version'], state['_model_name'], state['_view_module'], state['_view_module_version'], state['_view_name']) widget = widget_class(comm=comm) if 'buffer_paths' in data: _put_buffers(state, data['buffer_paths'], msg['buffers']) widget.set_state(state) @staticmethod def get_manager_state(drop_defaults=False, widgets=None): """Returns the full state for a widget manager for embedding :param drop_defaults: when True, it will not include default value :param widgets: list with widgets to include in the state (or all widgets when None) :return: """ state = {} if widgets is None: widgets = Widget.widgets.values() for widget in widgets: state[widget.model_id] = widget._get_embed_state(drop_defaults=drop_defaults) return {'version_major': 2, 'version_minor': 0, 'state': state} def _get_embed_state(self, drop_defaults=False): state = { 'model_name': self._model_name, 'model_module': self._model_module, 'model_module_version': self._model_module_version } model_state, buffer_paths, buffers = _remove_buffers(self.get_state(drop_defaults=drop_defaults)) state['state'] = model_state if len(buffers) > 0: state['buffers'] = [{'encoding': 'base64', 'path': p, 'data': standard_b64encode(d).decode('ascii')} for p, d in zip(buffer_paths, buffers)] return state def get_view_spec(self): return dict(version_major=2, version_minor=0, model_id=self._model_id) #------------------------------------------------------------------------- # Traits #------------------------------------------------------------------------- _model_name = Unicode('WidgetModel', help="Name of the model.", read_only=True).tag(sync=True) _model_module = Unicode('@jupyter-widgets/base', help="The namespace for the model.", read_only=True).tag(sync=True) _model_module_version = Unicode(__jupyter_widgets_base_version__, help="A semver requirement for namespace version containing the model.", read_only=True).tag(sync=True) _view_name = Unicode(None, allow_none=True, help="Name of the view.").tag(sync=True) _view_module = Unicode(None, allow_none=True, help="The namespace for the view.").tag(sync=True) _view_module_version = Unicode('', help="A semver requirement for the namespace version containing the view.").tag(sync=True) _view_count = Int(None, allow_none=True, help="EXPERIMENTAL: The number of views of the model displayed in the frontend. This attribute is experimental and may change or be removed in the future. None signifies that views will not be tracked. Set this to 0 to start tracking view creation/deletion.").tag(sync=True) comm = Instance('ipykernel.comm.Comm', allow_none=True) keys = List(help="The traits which are synced.") @default('keys') def _default_keys(self): return [name for name in self.traits(sync=True)] _property_lock = Dict() _holding_sync = False _states_to_send = Set() _display_callbacks = Instance(CallbackDispatcher, ()) _msg_callbacks = Instance(CallbackDispatcher, ()) #------------------------------------------------------------------------- # (Con/de)structor #------------------------------------------------------------------------- def __init__(self, **kwargs): """Public constructor""" self._model_id = kwargs.pop('model_id', None) super(Widget, self).__init__(**kwargs) Widget._call_widget_constructed(self) self.open() def __del__(self): """Object disposal""" self.close() #------------------------------------------------------------------------- # Properties #------------------------------------------------------------------------- def open(self): """Open a comm to the frontend if one isn't already open.""" if self.comm is None: state, buffer_paths, buffers = _remove_buffers(self.get_state()) args = dict(target_name='jupyter.widget', data={'state': state, 'buffer_paths': buffer_paths}, buffers=buffers, metadata={'version': __protocol_version__} ) if self._model_id is not None: args['comm_id'] = self._model_id self.comm = Comm(**args) @observe('comm') def _comm_changed(self, change): """Called when the comm is changed.""" if change['new'] is None: return self._model_id = self.model_id self.comm.on_msg(self._handle_msg) Widget.widgets[self.model_id] = self @property def model_id(self): """Gets the model id of this widget. If a Comm doesn't exist yet, a Comm will be created automagically.""" return self.comm.comm_id #------------------------------------------------------------------------- # Methods #------------------------------------------------------------------------- def close(self): """Close method. Closes the underlying comm. When the comm is closed, all of the widget views are automatically removed from the front-end.""" if self.comm is not None: Widget.widgets.pop(self.model_id, None) self.comm.close() self.comm = None self._ipython_display_ = None def send_state(self, key=None): """Sends the widget state, or a piece of it, to the front-end, if it exists. Parameters ---------- key : unicode, or iterable (optional) A single property's name or iterable of property names to sync with the front-end. """ state = self.get_state(key=key) if len(state) > 0: state, buffer_paths, buffers = _remove_buffers(state) msg = {'method': 'update', 'state': state, 'buffer_paths': buffer_paths} self._send(msg, buffers=buffers) def get_state(self, key=None, drop_defaults=False): """Gets the widget state, or a piece of it. Parameters ---------- key : unicode or iterable (optional) A single property's name or iterable of property names to get. Returns ------- state : dict of states metadata : dict metadata for each field: {key: metadata} """ if key is None: keys = self.keys elif isinstance(key, string_types): keys = [key] elif isinstance(key, collections.Iterable): keys = key else: raise ValueError("key must be a string, an iterable of keys, or None") state = {} traits = self.traits() for k in keys: to_json = self.trait_metadata(k, 'to_json', self._trait_to_json) value = to_json(getattr(self, k), self) if not PY3 and isinstance(traits[k], Bytes) and isinstance(value, bytes): value = memoryview(value) if not drop_defaults or not self._compare(value, traits[k].default_value): state[k] = value return state def _is_numpy(self, x): return x.__class__.__name__ == 'ndarray' and x.__class__.__module__ == 'numpy' def _compare(self, a, b): if self._is_numpy(a) or self._is_numpy(b): import numpy as np return np.array_equal(a, b) else: return a == b def set_state(self, sync_data): """Called when a state is received from the front-end.""" # The order of these context managers is important. Properties must # be locked when the hold_trait_notification context manager is # released and notifications are fired. with self._lock_property(**sync_data), self.hold_trait_notifications(): for name in sync_data: if name in self.keys: from_json = self.trait_metadata(name, 'from_json', self._trait_from_json) self.set_trait(name, from_json(sync_data[name], self)) def send(self, content, buffers=None): """Sends a custom msg to the widget model in the front-end. Parameters ---------- content : dict Content of the message to send. buffers : list of binary buffers Binary buffers to send with message """ self._send({"method": "custom", "content": content}, buffers=buffers) def on_msg(self, callback, remove=False): """(Un)Register a custom msg receive callback. Parameters ---------- callback: callable callback will be passed three arguments when a message arrives:: callback(widget, content, buffers) remove: bool True if the callback should be unregistered.""" self._msg_callbacks.register_callback(callback, remove=remove) def on_displayed(self, callback, remove=False): """(Un)Register a widget displayed callback. Parameters ---------- callback: method handler Must have a signature of:: callback(widget, **kwargs) kwargs from display are passed through without modification. remove: bool True if the callback should be unregistered.""" self._display_callbacks.register_callback(callback, remove=remove) def add_traits(self, **traits): """Dynamically add trait attributes to the Widget.""" super(Widget, self).add_traits(**traits) for name, trait in traits.items(): if trait.get_metadata('sync'): self.keys.append(name) self.send_state(name) def notify_change(self, change): """Called when a property has changed.""" # Send the state to the frontend before the user-registered callbacks # are called. name = change['name'] if self.comm is not None and self.comm.kernel is not None: # Make sure this isn't information that the front-end just sent us. if name in self.keys and self._should_send_property(name, getattr(self, name)): # Send new state to front-end self.send_state(key=name) super(Widget, self).notify_change(change) def __repr__(self): return self._gen_repr_from_keys(self._repr_keys()) #------------------------------------------------------------------------- # Support methods #------------------------------------------------------------------------- @contextmanager def _lock_property(self, **properties): """Lock a property-value pair. The value should be the JSON state of the property. NOTE: This, in addition to the single lock for all state changes, is flawed. In the future we may want to look into buffering state changes back to the front-end.""" self._property_lock = properties try: yield finally: self._property_lock = {} @contextmanager def hold_sync(self): """Hold syncing any state until the outermost context manager exits""" if self._holding_sync is True: yield else: try: self._holding_sync = True yield finally: self._holding_sync = False self.send_state(self._states_to_send) self._states_to_send.clear() def _should_send_property(self, key, value): """Check the property lock (property_lock)""" to_json = self.trait_metadata(key, 'to_json', self._trait_to_json) if key in self._property_lock: # model_state, buffer_paths, buffers split_value = _remove_buffers({ key: to_json(value, self)}) split_lock = _remove_buffers({ key: self._property_lock[key]}) # A roundtrip conversion through json in the comparison takes care of # idiosyncracies of how python data structures map to json, for example # tuples get converted to lists. if (jsonloads(jsondumps(split_value[0])) == split_lock[0] and split_value[1] == split_lock[1] and _buffer_list_equal(split_value[2], split_lock[2])): return False if self._holding_sync: self._states_to_send.add(key) return False else: return True # Event handlers @_show_traceback def _handle_msg(self, msg): """Called when a msg is received from the front-end""" data = msg['content']['data'] method = data['method'] if method == 'update': if 'state' in data: state = data['state'] if 'buffer_paths' in data: _put_buffers(state, data['buffer_paths'], msg['buffers']) self.set_state(state) # Handle a state request. elif method == 'request_state': self.send_state() # Handle a custom msg from the front-end. elif method == 'custom': if 'content' in data: self._handle_custom_msg(data['content'], msg['buffers']) # Catch remainder. else: self.log.error('Unknown front-end to back-end widget msg with method "%s"' % method) def _handle_custom_msg(self, content, buffers): """Called when a custom msg is received.""" self._msg_callbacks(self, content, buffers) def _handle_displayed(self, **kwargs): """Called when a view has been displayed for this widget instance""" self._display_callbacks(self, **kwargs) @staticmethod def _trait_to_json(x, self): """Convert a trait value to json.""" return x @staticmethod def _trait_from_json(x, self): """Convert json values to objects.""" return x def _ipython_display_(self, **kwargs): """Called when `IPython.display.display` is called on the widget.""" if self._view_name is not None: plaintext = repr(self) if len(plaintext) > 110: plaintext = plaintext[:110] + '…' # The 'application/vnd.jupyter.widget-view+json' mimetype has not been registered yet. # See the registration process and naming convention at # http://tools.ietf.org/html/rfc6838 # and the currently registered mimetypes at # http://www.iana.org/assignments/media-types/media-types.xhtml. data = { 'text/plain': plaintext, 'application/vnd.jupyter.widget-view+json': { 'version_major': 2, 'version_minor': 0, 'model_id': self._model_id } } display(data, raw=True) self._handle_displayed(**kwargs) def _send(self, msg, buffers=None): """Sends a message to the model in the front-end.""" if self.comm is not None and self.comm.kernel is not None: self.comm.send(data=msg, buffers=buffers) def _repr_keys(self): traits = self.traits() for key in sorted(self.keys): # Exclude traits that start with an underscore if key[0] == '_': continue # Exclude traits who are equal to their default value value = getattr(self, key) trait = traits[key] if self._compare(value, trait.default_value): continue elif (isinstance(trait, (Container, Dict)) and trait.default_value == Undefined and (value is None or len(value) == 0)): # Empty container, and dynamic default will be empty continue yield key def _gen_repr_from_keys(self, keys): class_name = self.__class__.__name__ signature = ', '.join( '%s=%r' % (key, getattr(self, key)) for key in keys ) return '%s(%s)' % (class_name, signature)
class TriggerEffiencyGenerator(Tool): name = "TriggerEffiencyGenerator" description = "Generate the a pickle file of TriggerEffiency for " \ "either MC or data files." telescopes = Int(1, help='Telescopes to include from the event file. ' 'Default = 1').tag(config=True) output_name = Unicode('trigger_efficiency', help='Name of the output trigger efficiency hdf5 ' 'file').tag(config=True) input_path = Unicode(help='Path to directory containing data').tag( config=True) max_events = Int(1000, help='Maximum number of events to use').tag(config=True) plot_cam = Bool(False, "enable plotting of individual camera").tag(config=True) use_true_pe = Bool(False, "Use true mc p.e.").tag(config=True) aliases = Dict( dict(input_path='TriggerEffiencyGenerator.input_path', output_name='TriggerEffiencyGenerator.output_name', max_events='TriggerEffiencyGenerator.max_events', clip_amplitude='CameraDL1Calibrator.clip_amplitude', radius='CameraDL1Calibrator.radius', max_pe='TriggerEffiencyGenerator.max_pe', T='TriggerEffiencyGenerator.telescopes', plot_cam='TriggerEffiencyGenerator.plot_cam', use_true_pe='TriggerEffiencyGenerator.use_true_pe')) classes = List([EventSourceFactory, CameraDL1Calibrator, CameraCalibrator]) def __init__(self, **kwargs): super().__init__(**kwargs) self.eventsource = None self.r1 = None self.dl0 = None self.dl1 = None self.cal = None self.trig_eff_array = [] self.disc_array = [] def setup(self): kwargs = dict(config=self.config, tool=self) self.dl0 = CameraDL0Reducer(**kwargs) self.dl1 = CameraDL1Calibrator(**kwargs) self.cal = CameraCalibrator() def start(self): run_list = np.loadtxt('%s/../runlist.txt' % self.input_path, unpack=True) file_list = listdir('%s' % self.input_path) file_list.sort() plot_cam = False plot_delay = 0.5 disp = None n_events = [] trig_eff = [] n_pe = [] if debug: fig = plt.figure(1) ax = fig.add_subplot(111) for n, run in enumerate(run_list[0]): n_events.append(run_list[5][n]) n_pe.append(run_list[3][n]) if str(int(run)) not in file_list[n]: print(str(int(run)), file_list[n]) print('check runlist.txt order, needs to be sorted?') exit() file_name = "%s/%s" % (self.input_path, file_list[n]) print(file_name) n_trig = 0 try: print('trying to open file') source = EventSourceFactory.produce(input_url=file_name, max_events=self.max_events) for event in tqdm(source): n_trig = n_trig + 1 except FileNotFoundError: print('file_not_found') print(run_list[7][n], n_trig, run_list[5][n], n_trig / run_list[5][n]) trig_eff.append(n_trig / run_list[5][n]) self.trig_eff_array.append(n_trig / run_list[5][n]) self.disc_array.append(run_list[7][n]) # exit() if debug: plt.plot(self.disc_array, self.trig_eff) plt.show() plt.show() def finish(self): out_file = open(self.output_name, 'w') for n, i in enumerate(self.trig_eff_array): out_file.write('%s\t%s\n' % (self.disc_array[n], i)) out_file.close() print('done')
class UrlAuthenticator(Authenticator): """ Class for authenticating to jupyterhub against a remote URL. """ # config values # address of the server hosting the login service server_address = Unicode(default_value='http://127.0.0.1', config=True, help='Address of the server with the login route') # port the service is exposed on server_port = Int( default_value=8080, config=True, help='Port on which to contact login server', ) # route to the service on the server_address:port # TODO: make this smarter about leading slashes... login_route = Unicode( default_value='/login', config=True, help='Route for the login service (assumes leading slash)') @gen.coroutine def authenticate(self, handler, data): """ Authenticate against a URL that provisdes an authentication service. Args: handler - the RequestHandler from Jupyter data - the data from the hub login form. """ resp = self.do_request(data) return self.process_response(resp) def do_request(self, data): """ send the request with the user creds to the logon server. return the response. """ url = '%s:%s%s' % (self.server_address, self.server_port, self.login_route) # get an httprequest with the headers and such using the provided data r = UrlAuthenticator.create_request(url, data) resp = None # hit the url and hopefully get a good response try: with urllib.request.urlopen(r) as f: resp = f.read() f.close() except urllib.error.HTTPError: return None return resp def process_response(self, resp): """ do whatever checks are necessary against the response to determine if the user should be authenticated. """ # if we had a good response, get the user name out of it (if there) and # return that. otherwise, return None (indicated bad login attempt) if resp: d = json.loads(resp.decode()) return d.get('username', None) return None @staticmethod def create_request(url, data): """ Make a Request object to hit the URL. Fills in some boilerplate stuff for a Request object. url is the full url (constructed from address, port, and route values) data is the data from a POST to the login form of the hub """ r = None conttype = 'application/json; charset=UTF-8' jdata = json.dumps(data).encode('utf-8') headers = { 'Content-Type': conttype, 'Content-Length': len(jdata), } return urllib.request.Request(url, jdata, headers)
class TF_editor(widgets.DOMWidget): _view_name = Unicode('TransferFunctionView').tag(sync=True) _model_name = Unicode('TransferFunctionModel').tag(sync=True) _view_module = Unicode('k3d').tag(sync=True) _model_module = Unicode('k3d').tag(sync=True) _view_module_version = Unicode(version).tag(sync=True) _model_module_version = Unicode(version).tag(sync=True) # readonly (specified at creation) height = Int().tag(sync=True) # read-write color_map = Array(dtype=np.float32).tag( sync=True, **array_serialization_wrap('color_map')) opacity_function = Array(dtype=np.float32).tag( sync=True, **array_serialization_wrap('opacity_function')) def __init__(self, height, color_map, opacity_function, *args, **kwargs): super(TF_editor, self).__init__() self.height = height with self.hold_trait_notifications(): self.color_map = color_map self.opacity_function = opacity_function self.outputs = [] def display(self, **kwargs): output = widgets.Output() with output: display(self, **kwargs) self.outputs.append(output) display(output) def close(self): for output in self.outputs: output.clear_output() self.outputs = [] def __getitem__(self, name): return getattr(self, name) @validate('color_map') def _validate_color_map(self, proposal): if proposal['value'].shape == (): return proposal['value'] cm_min, cm_max = np.min(proposal['value'][::4]), np.max( proposal['value'][::4]) if cm_min != 0.0 or cm_max != 1.0: proposal['value'][::4] = (proposal['value'][::4] - cm_min) / (cm_max - cm_min) return proposal['value'] @validate('opacity_function') def _validate_opacity_function(self, proposal): if proposal['value'].shape == (): return proposal['value'] of_min, of_max = np.min(proposal['value'][::2]), np.max( proposal['value'][::2]) if of_min != 0.0 or of_max != 1.0: proposal['value'][::2] = (proposal['value'][::2] - of_min) / (of_max - of_min) return proposal['value']
class VoilaConfiguration(traitlets.config.Configurable): """Common configuration options between the server extension and the application.""" allow_template_override = Enum(['YES', 'NOTEBOOK', 'NO'], 'YES', help=''' Allow overriding the template (YES), or not (NO), or only from the notebook metadata. ''', config=True) allow_theme_override = Enum(['YES', 'NOTEBOOK', 'NO'], 'YES', help=''' Allow overriding the theme (YES), or not (NO), or only from the notebook metadata. ''', config=True) template = Unicode('lab', config=True, allow_none=True, help=('template name to be used by voila.')) resources = Dict(allow_none=True, config=True, help=""" extra resources used by templates; example use with --template=reveal --VoilaConfiguration.resources="{'reveal': {'transition': 'fade', 'scroll': True}}" """) theme = Unicode('light', config=True) strip_sources = Bool(True, config=True, help='Strip sources from rendered html') enable_nbextensions = Bool( False, config=True, help=('Set to True for Voilà to load notebook extensions')) file_whitelist = List( Unicode(), [r'.*\.(png|jpg|gif|svg)'], config=True, help=r""" List of regular expressions for controlling which static files are served. All files that are served should at least match 1 whitelist rule, and no blacklist rule Example: --VoilaConfiguration.file_whitelist="['.*\.(png|jpg|gif|svg)', 'public.*']" """, ) file_blacklist = List(Unicode(), [r'.*\.(ipynb|py)'], config=True, help=r""" List of regular expressions for controlling which static files are forbidden to be served. All files that are served should at least match 1 whitelist rule, and no blacklist rule Example: --VoilaConfiguration.file_whitelist="['.*']" # all files --VoilaConfiguration.file_blacklist="['private.*', '.*\.(ipynb)']" # except files in the private dir and notebook files """) language_kernel_mapping = Dict( {}, config=True, help="""Mapping of language name to kernel name Example mapping python to use xeus-python, and C++11 to use xeus-cling: --VoilaConfiguration.extension_language_mapping='{"python": "xpython", "C++11": "xcpp11"}' """, ) extension_language_mapping = Dict( {}, config=True, help='''Mapping of file extension to kernel language Example mapping .py files to a python language kernel, and .cpp to a C++11 language kernel: --VoilaConfiguration.extension_language_mapping='{".py": "python", ".cpp": "C++11"}' ''', ) http_keep_alive_timeout = Int(10, config=True, help=""" When a cell takes a long time to execute, the http connection can timeout (possibly because of a proxy). Voila sends a 'heartbeat' message after the timeout is passed to keep the http connection alive. """) show_tracebacks = Bool( False, config=True, help=('Whether to send tracebacks to clients on exceptions.')) multi_kernel_manager_class = Type( config=True, default_value= 'jupyter_server.services.kernels.kernelmanager.AsyncMappingKernelManager', # default_value='voila.voila_kernel_manager.VoilaKernelManager', klass='jupyter_client.multikernelmanager.MultiKernelManager', help= """The kernel manager class. This is useful to specify a different kernel manager, for example a kernel manager with support for pooling. """) http_header_envs = List( Unicode(), [], help=r""" List of HTTP Headers that should be passed as env vars to the kernel. Example: --VoilaConfiguration.http_header_envs="['X-CDSDASHBOARDS-JH-USER']" """, ).tag(config=True) preheat_kernel = Bool( False, config=True, help="""Flag to enable or disable pre-heat kernel option. """) default_pool_size = Int( 1, config=True, help= """Size of pre-heated kernel pool for each notebook. Zero or negative number means disabled. """)
class DisplayIntegrator(Tool): name = "DisplayIntegrator" description = "Calibrate dl0 data to dl1, and plot the various camera " \ "images that characterise the event and calibration. Also " \ "plot some examples of waveforms with the " \ "integration window." event_index = Int(0, help='Event index to view.').tag(config=True) use_event_id = Bool(False, help='event_index will obtain an event using' 'event_id instead of ' 'index.').tag(config=True) telescope = Int(None, allow_none=True, help='Telescope to view. Set to None to display the first' 'telescope with data.').tag(config=True) channel = Enum([0, 1], 0, help='Channel to view').tag(config=True) aliases = Dict( dict(r='EventFileReaderFactory.reader', f='EventFileReaderFactory.input_path', max_events='EventFileReaderFactory.max_events', extractor='ChargeExtractorFactory.extractor', window_width='ChargeExtractorFactory.window_width', window_shift='ChargeExtractorFactory.window_shift', sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG', sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG', lwt='ChargeExtractorFactory.lwt', clip_amplitude='CameraDL1Calibrator.clip_amplitude', radius='CameraDL1Calibrator.radius', E='DisplayIntegrator.event_index', T='DisplayIntegrator.telescope', C='DisplayIntegrator.channel', O='IntegratorPlotter.output_dir')) flags = Dict( dict(id=({ 'DisplayDL1Calib': { 'use_event_index': True } }, 'event_index will obtain an event using ' 'event_id instead of index.'))) classes = List([ EventFileReaderFactory, ChargeExtractorFactory, CameraDL1Calibrator, IntegratorPlotter ]) def __init__(self, **kwargs): super().__init__(**kwargs) self.file_reader = None self.r1 = None self.dl0 = None self.extractor = None self.dl1 = None self.plotter = None def setup(self): self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]" kwargs = dict(config=self.config, tool=self) reader_factory = EventFileReaderFactory(**kwargs) reader_class = reader_factory.get_class() self.file_reader = reader_class(**kwargs) extractor_factory = ChargeExtractorFactory(**kwargs) extractor_class = extractor_factory.get_class() self.extractor = extractor_class(**kwargs) r1_factory = CameraR1CalibratorFactory(origin=self.file_reader.origin, **kwargs) r1_class = r1_factory.get_class() self.r1 = r1_class(**kwargs) self.dl0 = CameraDL0Reducer(**kwargs) self.dl1 = CameraDL1Calibrator(extractor=self.extractor, **kwargs) self.plotter = IntegratorPlotter(**kwargs) def start(self): event = self.file_reader.get_event(self.event_index, self.use_event_id) # Calibrate self.r1.calibrate(event) self.dl0.reduce(event) self.dl1.calibrate(event) # Select telescope tels = list(event.r0.tels_with_data) telid = self.telescope if telid is None: telid = tels[0] if telid not in tels: self.log.error("[event] please specify one of the following " "telescopes for this event: {}".format(tels)) exit() extractor_name = self.extractor.name self.plotter.plot(self.file_reader, event, telid, self.channel, extractor_name) def finish(self): pass
class Evaluate(App): dataset_type = Enum(('dblp', 'pubmed', 'oc'), default_value='pubmed') candidate_selector_type = Enum(('bm25', 'ann', 'oracle'), default_value='bm25') metric = Enum(('precision', 'recall', 'f1'), default_value='recall') split = Enum(('train', 'test', 'valid'), default_value='valid') # ann options paper_embedder_dir = Unicode(default_value=None, allow_none=True) # Candidate selector options num_candidates = Int(default_value=None, allow_none=True) ranker_type = Enum(('none', 'neural'), default_value='none') n_eval = Int(default_value=None, allow_none=True) # ranker options citation_ranker_dir = Unicode(default_value=None, allow_none=True) _embedder = None _ann = None def embedder(self, featurizer, embedding_model) -> EmbeddingModel: if self._embedder is None: self._embedder = EmbeddingModel(featurizer, embedding_model) return self._embedder def ann(self, embedder, corpus) -> ANN: if corpus.corpus_type == 'oc' and os.path.exists( DatasetPaths.OC_ANN_FILE + ".pickle"): self._ann = ANN.load(DatasetPaths.OC_ANN_FILE) return self._ann if self._ann is None: self._ann = ANN.build(embedder, corpus, ann_trees=100) if self.dataset_type == 'oc': self._ann.save(DatasetPaths.OC_ANN_FILE) return self._ann def _make_ann_candidate_selector(self, corpus, featurizer, embedding_model, num_candidates): e = self.embedder(featurizer, embedding_model) return ANNCandidateSelector(corpus=corpus, ann=self.ann(e, corpus), paper_embedding_model=e, top_k=num_candidates, extend_candidate_citations=True) def main(self, args): dp = DatasetPaths() if self.dataset_type == 'oc': corpus = Corpus.load_pkl(dp.get_pkl_path(self.dataset_type)) else: corpus = Corpus.load(dp.get_db_path(self.dataset_type)) if self.ranker_type == 'none': citation_ranker = NoneRanker() elif self.ranker_type == 'neural': assert self.citation_ranker_dir is not None ranker_featurizer, ranker_models = model_from_directory( self.citation_ranker_dir, on_cpu=True) citation_ranker = Ranker( corpus=corpus, featurizer=ranker_featurizer, citation_ranker=ranker_models['citeomatic'], num_candidates_to_rank=100) else: assert False candidate_results_map = {} if self.num_candidates is None: if self.dataset_type == 'oc': num_candidates_list = [100] else: num_candidates_list = [1, 5, 10, 15, 25, 50, 75, 100] else: num_candidates_list = [self.num_candidates] for num_candidates in num_candidates_list: if self.candidate_selector_type == 'bm25': index_path = dp.get_bm25_index_path(self.dataset_type) candidate_selector = BM25CandidateSelector( corpus, index_path, num_candidates, False) elif self.candidate_selector_type == 'ann': assert self.paper_embedder_dir is not None featurizer, models = model_from_directory( self.paper_embedder_dir, on_cpu=True) candidate_selector = self._make_ann_candidate_selector( corpus=corpus, featurizer=featurizer, embedding_model=models['embedding'], num_candidates=num_candidates) elif self.candidate_selector_type == 'oracle': candidate_selector = OracleCandidateSelector(corpus) else: assert False results = eval_text_model(corpus, candidate_selector, citation_ranker, papers_source=self.split, n_eval=self.n_eval) candidate_results_map[num_candidates] = results best_k = -1 best_metric = 0.0 metric_key = self.metric + "_1" for k, v in candidate_results_map.items(): if best_metric < v[metric_key][EVAL_DATASET_KEYS[ self.dataset_type]]: best_k = k best_metric = v[metric_key][EVAL_DATASET_KEYS[ self.dataset_type]] print(json.dumps(candidate_results_map, indent=4, sort_keys=True)) print(best_k) print(best_metric)
class ResourceUseDisplay(Configurable): """ Holds server-side configuration for nbresuse """ process_cpu_metrics = List( trait=PSUtilMetric(), default_value=[{ "name": "cpu_percent", "kwargs": { "interval": 0.05 } }], ) system_cpu_metrics = List(trait=PSUtilMetric(), default_value=[{ "name": "cpu_count" }]) mem_limit = Union( trait_types=[Int(), Callable()], help=""" Memory limit to display to the user, in bytes. Can also be a function which calculates the memory limit. Note that this does not actually limit the user's memory usage! Defaults to reading from the `MEM_LIMIT` environment variable. If set to 0, the max memory available is displayed. """, ).tag(config=True) @default("mem_limit") def _mem_limit_default(self): return int(os.environ.get("MEM_LIMIT", 0)) track_cpu_percent = Bool( default_value=True, help=""" Set to True in order to enable reporting of CPU usage statistics. """, ).tag(config=True) cpu_limit = Union( trait_types=[Float(), Callable()], default_value=0, help=""" CPU usage limit to display to the user. Note that this does not actually limit the user's CPU usage! Defaults to reading from the `CPU_LIMIT` environment variable. If set to 0, the total CPU count available is displayed. """, ).tag(config=True) @default("cpu_limit") def _cpu_limit_default(self): return float(os.environ.get("CPU_LIMIT", 0)) track_disk_usage = Bool( default_value=True, help=""" Set to True in order to enable reporting of Disk usage statistics. """, ).tag(config=True) disk_limit = Union( trait_types=[Int(), Callable()], default_value=0, help=""" Disk usage limit to display to the user. Note that this does not actually limit the user's Disk space! Defaults to reading from the `DISK_LIMIT` environment variable. If set to 0, the total partition space available is displayed. """, ).tag(config=True) @default("disk_limit") def _disk_limit_default(self): return int(os.environ.get("DISK_LIMIT", 0)) disk_dir = Union( trait_types=[Unicode(), Callable()], default_value=os.getcwd(), help=""" The directory that is on the partition to get the size of. Note that this does not actually limit the user's Disk space! Defaults to reading from the `DISK_DIR` environment variable. If not defined, it effectively defaults to /home/jovyan. """, ).tag(config=True) @default("disk_dir") def _disk_dir_default(self): return str(os.environ.get("DISK_DIR", os.getcwd()))
class KeplerGl(widgets.DOMWidget): """An example widget.""" _view_name = Unicode('KeplerGlView').tag(sync=True) _model_name = Unicode('KeplerGlModal').tag(sync=True) _view_module = Unicode('keplergl-jupyter').tag(sync=True) _model_module = Unicode('keplergl-jupyter').tag(sync=True) _view_module_version = Unicode(EXTENSION_SPEC_VERSION).tag(sync=True) _model_module_version = Unicode(EXTENSION_SPEC_VERSION).tag(sync=True) value = Unicode('Hello World!').tag(sync=True) data = Dict({}).tag(sync=True, **data_serialization) config = Dict({}).tag(sync=True) height = Int(400).tag(sync=True) def __init__(self, **kwargs): super(KeplerGl, self).__init__(**kwargs) print('User Guide: {}'.format(documentation)) @validate('data') def _validate_data(self, proposal): '''Validate data input. Makes sure data is a dict, and each value should be either a df, a geojson dictionary / string or csv string layers list. ''' if type(proposal.value) is not dict: raise DataException( '[data type error]: Expecting a dictionary mapping from id to value, but got {}' .format(type(proposal.value))) else: for key, value in proposal.value.items(): if not isinstance(value, pd.DataFrame) and ( type(value) is not str) and (type(value) is not dict): raise DataException( '[data type error]: value of {} should be a DataFrame, a Geojson Dictionary or String, a csv String, but got {}' .format(key, type(value))) return proposal.value def add_data(self, data, name="unnamed"): ''' Send data to Voyager Inputs: - data string, can be a csv string or json string - name string Example of use: keplergl.add_data(data_string, name="data_1") ''' normalized = _normalize_data(data) copy = self.data.copy() copy.update({name: normalized}) self.data = copy def save_to_html(self, data=None, config=None, file_name='keplergl_map.html', read_only=False): ''' Save current map to an interactive html Inputs: - data: a data dictionary {"name": data}, if not provided, will use current map data - config: map config dictionary, if not provided, will use current map config - file_name: the html file name, default is keplergl_map.html - read_only: if read_only is True, hide side panel to disable map customization Returns: - an html file will be saved to your notebook Example of use: # this will save map with provided data and config keplergl.save_to_html(data={"data_1": df}, config=config, file_name='first_map.html') # this will save current map keplergl.save_to_html(file_name='first_map.html') ''' keplergl_html = resource_string(__name__, 'static/keplergl.html').decode('utf-8') # find open of body k = keplergl_html.find("<body>") data_to_add = data_to_json( self.data, None) if data == None else data_to_json(data, None) config_to_add = self.config if config == None else config # for key in data_to_add: # print(type(data_to_add[key])) keplergl_data = json.dumps({ "config": config_to_add, "data": data_to_add, "options": { "readOnly": read_only } }) cmd = """window.__keplerglDataConfig = {};""".format(keplergl_data) frame_txt = keplergl_html[: k] + "<body><script>" + cmd + "</script>" + keplergl_html[ k + 6:] with open(file_name, 'wb') as f: f.write(frame_txt.encode('utf-8')) print("Map saved to {}!".format(file_name))
class DataGrid(DOMWidget): """A Grid Widget with filter, sort and selection capabilities. Attributes ---------- base_row_size : int (default: 20) Default row height base_column_size : int (default: 64) Default column width base_row_header_size : int (default: 64) Default row header width base_column_header_size : int (default: 20) Default column header height header_visibility : {'all', 'row', 'column', 'none'} (default: 'all') Header visibility mode 'all': both row and column headers visible 'row': only row headers visible 'column': only column headers visible 'none': neither row and column headers visible dataframe : pandas dataframe Data to display on Data Grid. renderers : dict Custom renderers to use for cell rendering. Keys of dictionary specify column name, and value specifies the renderer default_renderer : CellRenderer (default: TextRenderer) Default renderer to use for cell rendering header_renderer : CellRenderer (default: TextRenderer) Renderer to use for header cell rendering corner_renderer : CellRenderer (default: TextRenderer) Renderer to use for corner header cell rendering selection_mode : {'row', 'column', 'cell', 'none'} (default: 'none') Selection mode used when user clicks on grid or makes selections programmatically. 'row': Selecting a cell will select all the cells on the same row 'column': Selecting a cell will select all the cells on the same column 'cell': Individual cell selection 'none': Selection disabled selections : list of dict List of all selections. Selections are represented as rectangular regions. Rectangles are defined as dictionaries with keys: 'r1': start row, 'c1': start column, 'r2': end row, 'c2': end column. Start of rectangle is top-left corner and end is bottom-right corner editable : boolean (default: false) Boolean indicating whether cell grid can be directly edited column_widths : Dict of strings to int (default: {}) Dict to specify custom column sizes The keys (strings) indicate the names of the columns The values (integers) indicate the widths auto_fit_columns : Bool (default: True) Specify whether column width should automatically be determined by the grid auto_fit_params : Dict. Specify column auto fit parameters. Supported parameters: 1) area: where to resize column widths - 'row-header', 'body' or 'all' (default) 2) padding: add padding to resized column widths (15 pixels by default) 3) numCols: cap the number of columns to be resized (None) grid_style : Dict of {propertyName: string | VegaExpr | Dict} Dict to specify global grid styles. The keys (strings) indicate the styling property The values (css color properties or Vega Expression) indicate the values See below for all supported styling properties index_name : str (default: "key") String to specify the index column name. **Only set when the grid is constructed and is not an observable traitlet** Accessors (not observable traitlets) --------- selected_cells : list of dict List of selected cells. Each cell is represented as a dictionary with keys 'r': row and 'c': column selected_cell_values : list List of values for all selected cells. selected_cell_iterator : iterator An iterator to traverse selected cells one by one. Supported styling properties: void_color : color of the area where the grid is not painted on the canvas background_color : background color for all body cells row_background_color : row-wise background color (can take a string or Vega Expression) column_background_color : column-wise background color (can take a string of Vega Expression) grid_line_color : color of both vertical and horizontal grid lines vertical_grid_line_color : vertical grid line color horizontal_grid_line_color : horizontal grid line color header_background_color : background color for all non-body cells (index and columns) header_grid_line_color : grid line color for all non-body cells (index and columns) header_vertical_grid_line_color : vertical grid line color for all non-body cells header_horizontal_grid_line_color : horizontal grid line color for all non-body cells selection_fill_color : fill color of selected area selection_border_color : border color of selected area header_selection_fill_color : fill color of headers intersecting with selected area at column or row header_selection_border_color : border color of headers intersecting with selected area at column or row cursor_fill_color : fill color of cursor cursor_border_color : border color of cursor scroll_shadow : Dict of color parameters for scroll shadow (vertical and horizontal). Takes three paramaters: size : size of shadow in pixels color1 : gradient color 1 color2 : gradient color 2 color3 : gradient color 3 """ _model_name = Unicode("DataGridModel").tag(sync=True) _model_module = Unicode(module_name).tag(sync=True) _model_module_version = Unicode(module_version).tag(sync=True) _view_name = Unicode("DataGridView").tag(sync=True) _view_module = Unicode(module_name).tag(sync=True) _view_module_version = Unicode(module_version).tag(sync=True) base_row_size = Int(20).tag(sync=True) base_column_size = Int(64).tag(sync=True) base_row_header_size = Int(64).tag(sync=True) base_column_header_size = Int(20).tag(sync=True) header_visibility = Enum(default_value="all", values=["all", "row", "column", "none"]).tag(sync=True) _transforms = List(Dict()).tag(sync=True, **widget_serialization) _visible_rows = List(Int()).tag(sync=True) _data = Dict().tag(sync=True, **_data_serialization) renderers = Dict(Instance(CellRenderer)).tag(sync=True, **widget_serialization) default_renderer = Instance(CellRenderer).tag(sync=True, **widget_serialization) header_renderer = Instance(CellRenderer, allow_none=True).tag(sync=True, **widget_serialization) corner_renderer = Instance(CellRenderer, allow_none=True).tag(sync=True, **widget_serialization) selection_mode = Enum(default_value="none", values=["row", "column", "cell", "none"]).tag(sync=True) selections = List(Dict()).tag(sync=True, **widget_serialization) editable = Bool(False).tag(sync=True) column_widths = Dict({}).tag(sync=True) grid_style = Dict(allow_none=True).tag(sync=True, **widget_serialization) auto_fit_columns = Bool(False).tag(sync=True) auto_fit_params = Dict({ "area": "all", "padding": 30, "numCols": None }, allow_none=False).tag(sync=True, **widget_serialization) def __init__(self, dataframe, **kwargs): # Setting default index name if not explicitly # set by the user. if "index_name" in kwargs: self._index_name = kwargs["index_name"] else: self._index_name = None self.data = dataframe super().__init__(**kwargs) self._cell_click_handlers = CallbackDispatcher() self._cell_change_handlers = CallbackDispatcher() self.on_msg(self.__handle_custom_msg) def __handle_custom_msg(self, _, content, buffers): # noqa: U101,U100 if content["event_type"] == "cell-changed": row = content["row"] column = self._column_index_to_name(self._data, content["column_index"]) value = content["value"] # update data on kernel self._data["data"][row][column] = value # notify python listeners self._cell_change_handlers({ "row": row, "column": column, "column_index": content["column_index"], "value": value, }) elif content["event_type"] == "cell-click": # notify python listeners self._cell_click_handlers({ "region": content["region"], "column": content["column"], "column_index": content["column_index"], "row": content["row"], "primary_key_row": content["primary_key_row"], "cell_value": content["cell_value"], }) @property def data(self): trimmed_primary_key = self._data["schema"]["primaryKey"][:-1] df = pd.DataFrame(self._data["data"]) final_df = df.set_index(trimmed_primary_key) final_df = final_df[final_df.columns[:-1]] return final_df @staticmethod def generate_data_object(dataframe, guid_key="ipydguuid", index_name="key"): dataframe[guid_key] = pd.RangeIndex(0, dataframe.shape[0]) # Renaming default index name from 'index' to 'key' on # single index DataFrames. This allows users to use # 'index' as a column name. If 'key' exists, we add _x # suffix to id, where { x | 0 <= x < inf } if not isinstance(dataframe.index, pd.MultiIndex): if index_name in dataframe.columns: index = 0 new_index_name = f"{index_name}_{index}" while new_index_name in dataframe.columns: index += 1 new_index_name = f"{index_name}_{index}" dataframe = dataframe.rename_axis(new_index_name) else: dataframe = dataframe.rename_axis(index_name) schema = pd.io.json.build_table_schema(dataframe) reset_index_dataframe = dataframe.reset_index() data = reset_index_dataframe.to_dict(orient="records") # Check for multiple primary keys key = reset_index_dataframe.columns[:dataframe.index.nlevels].tolist() num_index_levels = len(key) if isinstance(key, list) else 1 # Check for nested columns in schema, if so, we need to update the # schema to represent the actual column name values if isinstance(schema["fields"][-1]["name"], tuple): num_column_levels = len(dataframe.columns.levels) primary_key = key.copy() for i in range(num_index_levels): new_name = [""] * num_column_levels new_name[0] = schema["fields"][i]["name"] schema["fields"][i]["name"] = tuple(new_name) primary_key[i] = tuple(new_name) schema["primaryKey"] = primary_key uuid_pk = list(key[-1]) uuid_pk[0] = guid_key schema["primaryKey"].append(tuple(uuid_pk)) else: schema["primaryKey"] = key schema["primaryKey"].append(guid_key) schema["primaryKeyUuid"] = guid_key return { "data": data, "schema": schema, "fields": [{ field["name"]: None } for field in schema["fields"]], } @data.setter def data(self, dataframe): # Reference for the original frame column and index names # This is used to when returning the view data model self.__dataframe_reference_index_names = dataframe.index.names self.__dataframe_reference_columns = dataframe.columns dataframe = dataframe.copy() # Primary key used index_key = self.get_dataframe_index(dataframe) self._data = self.generate_data_object(dataframe, "ipydguuid", index_key) def get_dataframe_index(self, dataframe): """Returns a primary key to be used in ipydatagrid's view of the passed DataFrame""" # Passed index_name takes highest priority if self._index_name is not None: return self._index_name # Dataframe with names index used by default if dataframe.index.name is not None: return dataframe.index.name # If no index_name param, nor named-index DataFrame # have been passed, revert to default "key" return "key" def get_cell_value(self, column_name, primary_key_value): """Gets the value for a single or multiple cells by column name and index name. Tuples should be used to index into multi-index columns.""" row_indices = self._get_row_index_of_primary_key(primary_key_value) return [self._data["data"][row][column_name] for row in row_indices] def set_cell_value(self, column_name, primary_key_value, new_value): """Sets the value for a single cell by column name and primary key. Note: This method returns a boolean to indicate if the operation was successful. """ row_indices = self._get_row_index_of_primary_key(primary_key_value) # Bail early if key could not be found if not row_indices: return False # Iterate over all indices outcome = True for row_index in row_indices: has_column = column_name in self._data["data"][row_index] if has_column and row_index is not None: self._data["data"][row_index][column_name] = new_value self._notify_cell_change(row_index, column_name, new_value) else: outcome = False return outcome def get_cell_value_by_index(self, column_name, row_index): """Gets the value for a single cell by column name and row index.""" return self._data["data"][row_index][column_name] def set_cell_value_by_index(self, column_name, row_index, new_value): """Sets the value for a single cell by column name and row index. Note: This method returns a boolean to indicate if the operation was successful. """ has_column = column_name in self._data["data"][row_index] if has_column and 0 <= row_index < len(self._data["data"]): self._data["data"][row_index][column_name] = new_value self._notify_cell_change(row_index, column_name, new_value) return True return False def _notify_cell_change(self, row, column, value): column_index = self._column_name_to_index(column) # notify python listeners self._cell_change_handlers({ "row": row, "column": column, "column_index": column_index, "value": value, }) # notify front-end self.comm.send( data={ "method": "custom", "content": { "event_type": "cell-changed", "row": row, "column": column, "column_index": column_index, "value": value, }, }) def get_visible_data(self): """Returns a dataframe of the current View.""" data = deepcopy(self._data) if self._visible_rows: data["data"] = [data["data"][i] for i in self._visible_rows] at = self._data["schema"]["primaryKey"] return_df = pd.DataFrame(data["data"]).set_index(at) return_df.index = return_df.index.droplevel(return_df.index.nlevels - 1) return_df.index.names = self.__dataframe_reference_index_names return_df.columns = self.__dataframe_reference_columns return return_df def transform(self, transforms): """Apply a list of transformation to this DataGrid.""" # TODO: Validate this input, or let it fail on view side? self._transforms = transforms def revert(self): """Revert all transformations.""" self._transforms = [] @default("default_renderer") def _default_renderer(self): return TextRenderer() def clear_selection(self): """Clears all selections.""" self.selections.clear() self.send_state("selections") def select(self, row1, column1, row2=None, column2=None, clear_mode="none"): """ Select an individual cell or rectangular cell region. Parameters ---------- row1 : int Row index for individual cell selection or start row index for rectangular region selection. column1 : int Column index for individual cell selection or start column index for rectangular region selection. row2 : int or None, optional (default: None) End row index for rectangular region selection. column2 : int or None, optional (default: None) End column index for rectangular region selection. clear_mode : string, optional, {'all', 'current', 'none'} (default: 'none') Clear mode to use when there are pre-existing selections. 'all' removes all pre-existing selections 'current' removes last pre-existing selection 'none' keeps pre-existing selections """ if row2 is None or column2 is None: row2, column2 = row1, column1 if clear_mode == "all": self.selections.clear() elif clear_mode == "current" and len(self.selections) > 0: self.selections.pop() self.selections.append({ "r1": min(row1, row2), "c1": min(column1, column2), "r2": max(row1, row2), "c2": max(column1, column2), }) self.send_state("selections") @property def selected_cells(self): """ List of selected cells. Each cell is represented as a dictionary with keys 'r': row and 'c': column """ return SelectionHelper(self._data, self.selections, self.selection_mode).all() @property def selected_cell_values(self): """ List of values for all selected cells. """ # Copy of the front-end data model view_data = self.get_visible_data() # Get primary key from dataframe index_key = self.get_dataframe_index(view_data) # Serielize to JSON table schema view_data_object = self.generate_data_object(view_data, "ipydguuid", index_key) return SelectionHelper(view_data_object, self.selections, self.selection_mode).all_values() @property def selected_cell_iterator(self): """ An iterator to traverse selected cells one by one. """ return SelectionHelper(self._data, self.selections, self.selection_mode) @validate("selections") def _validate_selections(self, proposal): selections = proposal["value"] for rectangle in selections: r1 = min(rectangle["r1"], rectangle["r2"]) c1 = min(rectangle["c1"], rectangle["c2"]) r2 = max(rectangle["r1"], rectangle["r2"]) c2 = max(rectangle["c1"], rectangle["c2"]) rectangle["r1"] = r1 rectangle["c1"] = c1 rectangle["r2"] = r2 rectangle["c2"] = c2 return selections @validate("editable") def _validate_editable(self, proposal): value = proposal["value"] if value and self.selection_mode == "none": self.selection_mode = "cell" return value @validate("_transforms") def _validate_transforms(self, proposal): transforms = proposal["value"] field_len = len(self._data["schema"]["fields"]) for transform in transforms: if transform["columnIndex"] > field_len: raise ValueError("Column index is out of bounds.") return transforms @validate("_data") def _validate_data(self, proposal): table_schema = proposal["value"] column_list = [f["name"] for f in table_schema["schema"]["fields"]] if len(column_list) != len(set(column_list)): msg = "The dataframe must not contain duplicate column names." raise ValueError(msg) return table_schema def on_cell_change(self, callback, remove=False): """Register a callback to execute when a cell value changed. The callback will be called with one argument, the dictionary containing cell information with keys "row", "column", "column_index", "value". Parameters ---------- remove: bool (optional) Set to true to remove the callback from the list of callbacks. """ self._cell_change_handlers.register_callback(callback, remove=remove) def on_cell_click(self, callback, remove=False): """Register a callback to execute when a cell is clicked. The callback will be called with one argument, the dictionary containing cell information with following keys: "region", "column", "column_index", "row", "primary_key_row", "cell_value" Parameters ---------- remove: bool (optional) Set to true to remove the callback from the list of callbacks. """ self._cell_click_handlers.register_callback(callback, remove=remove) @staticmethod def _column_index_to_name(data, column_index): if "schema" not in data or "fields" not in data["schema"]: return None col_headers = DataGrid._get_col_headers(data) return (None if len(col_headers) <= column_index else col_headers[column_index]) @staticmethod def _get_col_headers(data): primary_keys = ([] if "primaryKey" not in data["schema"] else data["schema"]["primaryKey"]) col_headers = [ field["name"] for field in data["schema"]["fields"] if field["name"] not in primary_keys ] return col_headers def _column_name_to_index(self, column_name): if "schema" not in self._data or "fields" not in self._data["schema"]: return None col_headers = self._get_col_headers(self._data) try: return col_headers.index(column_name) except ValueError: pass def _get_row_index_of_primary_key(self, value): value = value if isinstance(value, list) else [value] schema = self._data["schema"] key = schema["primaryKey"][:-1] # Omitting ipydguuid if len(value) != len(key): raise ValueError( "The provided primary key value must be the same length " "as the primary key.") row_indices = [ at for at, row in enumerate(self._data["data"]) if all(row[key[j]] == value[j] for j in range(len(key))) ] return row_indices @staticmethod def _get_cell_value_by_numerical_index(data, column_index, row_index): """Gets the value for a single cell by column index and row index.""" column = DataGrid._column_index_to_name(data, column_index) if column is None: return None return data["data"][row_index][column]
class LDAPAuthenticator(Authenticator): """ LDAP Authenticator for Jupyterhub """ server_hosts = Union([List(), Unicode()], config=True, help=""" List of Names, IPs, or the complete URLs in the scheme://hostname:hostport format of the server (required). """) server_port = Int(allow_none=True, default_value=None, config=True, help=""" The port where the LDAP server is listening. Typically 389, for a cleartext connection, and 636 for a secured connection (defaults to None). """) server_use_ssl = Bool(default_value=False, config=True, help=""" Boolean specifying if the connection is on a secure port (defaults to False). """) server_connect_timeout = Int(allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted when establishing an ldap connection before raising an exception (defaults to None). """) server_receive_timeout = Int(allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted for responses from established ldap connections before raising an exception (defaults to None). """) server_pool_strategy = Unicode(default_value='FIRST', config=True, help=""" Available Pool HA strategies (defaults to 'FIRST'). FIRST: Gets the first server in the pool, if 'server_pool_active' is set to True gets the first available server. ROUND_ROBIN: Each time the connection is open the subsequent server in the pool is used. If 'server_pool_active' is set to True unavailable servers will be discarded. RANDOM: each time the connection is open a random server is chosen in the pool. If 'server_pool_active' is set to True unavailable servers will be discarded. """) server_pool_active = Union([Bool(), Int()], default_value=True, config=True, help=""" If True the ServerPool strategy will check for server availability. Set to Integer for maximum number of cycles to try before giving up (defaults to True). """) server_pool_exhaust = Union([Bool(), Int()], default_value=False, config=True, help=""" If True, any inactive servers will be removed from the pool. If set to an Integer, this will be the number of seconds an unreachable server is considered offline. When this timeout expires the server is reinserted in the pool and checked again for availability (defaults to False). """) bind_user_dn = Unicode(allow_none=True, default_value=None, config=True, help=""" The account of the user to log in for simple bind (defaults to None). """) bind_user_password = Unicode(allow_none=True, default_value=None, config=True, help=""" The password of the user for simple bind (defaults to None) """) user_search_base = Unicode(config=True, help=""" The location in the Directory Information Tree where the user search will start. """) user_search_filter = Unicode(config=True, help=""" LDAP search filter to validate that the authenticating user exists within the organization. Search filters containing '{username}' will have that value substituted with the username of the authenticating user. """) filter_by_group = Bool(default_value=True, config=True, help=""" Boolean specifying if the group membership filtering is enabled or not. """) user_membership_attribute = Unicode(default_value='memberOf', config=True, help=""" LDAP Attribute used to associate user group membership (defaults to 'memberOf'). """) group_search_base = Unicode(config=True, help=""" The location in the Directory Information Tree where the group search will start. Search string containing '{group}' will be substituted with entries taken from allow_nested_groups. """) group_search_filter = Unicode(config=True, help=""" LDAP search filter to return members of groups defined in the allowed_groups parameter. Search filters containing '{group}' will have that value substituted with the group dns provided in the allowed_groups parameter. """) allowed_groups = Union([Unicode(), List()], config=True, help=""" List of LDAP group DNs that users must be a member of in order to be granted login. """) allow_nested_groups = Bool(default_value=False, config=True, help=""" Boolean allowing for recursive search of members within nested groups of allowed_groups (defaults to False). """) username_pattern = Unicode(config=True, help=""" Regular expression pattern that all valid usernames must match. If a username does not match the pattern specified here, authentication will not be attempted. If not set, allow any username (defaults to None). """) username_regex = Any(help=""" Compiled regex kept in sync with `username_pattern` """) @observe('username_pattern') def _username_pattern_changed(self, change): if not change['new']: self.username_regex = None self.username_regex = re.compile(change['new']) create_user_home_dir = Bool(default_value=False, config=True, help=""" If set to True, will attempt to create a user's home directory locally if that directory does not exist already. """) create_user_home_dir_cmd = Command(config=True, help=""" Command to create a users home directory. """) @default('create_user_home_dir_cmd') def _default_create_user_home_dir_cmd(self): if sys.platform == 'linux': home_dir_cmd = ['mkhomedir_helper'] else: self.log.debug( "Not sure how to create a home directory on '%s' system", sys.platform) home_dir_cmd = [''] return home_dir_cmd @gen.coroutine def add_user(self, user): username = user.name user_exists = yield gen.maybe_future( self.user_home_dir_exists(username)) if not user_exists: if self.create_user_home_dir: yield gen.maybe_future(self.add_user_home_dir(username)) else: raise KeyError("Domain user '%s' does not exists locally." % username) yield gen.maybe_future(super().add_user(user)) def user_home_dir_exists(self, username): """ Verify user home directory exists """ user = pwd.getpwnam(username) home_dir = user[5] return bool(os.path.isdir(home_dir)) def add_user_home_dir(self, username): """ Creates user home directory """ cmd = [ arg.replace('USERNAME', username) for arg in self.create_user_home_dir_cmd ] + [username] self.log.info("Creating '%s' user home directory using command '%s'", username, ' '.join(map(pipes.quote, cmd))) create_dir = Popen(cmd, stdout=PIPE, stderr=STDOUT) create_dir.wait() if create_dir.returncode: err = create_dir.stdout.read().decode('utf8', 'replace') raise RuntimeError("Failed to create system user %s: %s" % (username, err)) def normalize_username(self, username): """ Normalize username for ldap query modifications: - format to lowercase - escape filter characters (ldap3) """ username = username.lower() username = escape_filter_chars(username) return username def validate_username(self, username): """ Validate a normalized username Return True if username is valid, False otherwise. """ if '/' in username: # / is not allowed in usernames return False if not username: # empty usernames are not allowed return False if not self.username_regex: return True return bool(self.username_regex.match(username)) def validate_host(self, host): """ Validate hostname Return True if host is valid, False otherwise. """ host_ip_regex = re.compile( r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$' ) host_name_regex = re.compile( r'^((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}$' ) host_url_regex = re.compile( r'^(ldaps?://)(((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}):([0-9]{3})$' ) if bool(host_ip_regex.match(host)): # using ipv4 address valid = True elif bool(host_name_regex.match(host)): # using a hostname address valid = True elif bool(host_url_regex.match(host)): # using host url address valid = True else: # unsupported host format valid = False return valid def create_ldap_server_pool_obj(self, ldap_servers=None): """ Create ldap3 ServerPool Object """ server_pool = ldap3.ServerPool( ldap_servers, pool_strategy=self.server_pool_strategy.upper(), active=self.server_pool_active, exhaust=self.server_pool_exhaust) return server_pool def create_ldap_server_obj(self, host): """ Create ldap3 Server Object """ server = ldap3.Server(host, port=self.server_port, use_ssl=self.server_use_ssl, connect_timeout=self.server_connect_timeout) return server def ldap_connection(self, server_pool, username, password): """ Create ldaps Connection Object """ try: conn = ldap3.Connection( server_pool, user=username, password=password, auto_bind=ldap3.AUTO_BIND_TLS_BEFORE_BIND, read_only=True, receive_timeout=self.server_receive_timeout) except ldap3.core.exceptions.LDAPBindError as exc: msg = '\n{exc_type}: {exc_msg}'.format( exc_type=exc.__class__.__name__, exc_msg=exc.args[0] if exc.args else '') self.log.error("Failed to connect to ldap: %s", msg) return None return conn def get_nested_groups(self, conn, group): """ Recursively search group for nested memberships """ nested_groups = list() conn.search(search_base=self.group_search_base, search_filter=self.group_search_filter.format(group=group), search_scope=ldap3.SUBTREE) if conn.response: for nested_group in conn.response: nested_groups.extend([nested_group['dn']]) groups = self.get_nested_groups(conn, nested_group['dn']) nested_groups.extend(groups) nested_groups = list(set(nested_groups)) return nested_groups @gen.coroutine def authenticate(self, handler, data): # define vars username = data['username'] password = data['password'] server_pool = self.create_ldap_server_pool_obj() conn_servers = list() # validate credentials username = self.normalize_username(username) if not self.validate_username(username): self.log.error('Unsupported username supplied') return None if password is None or password.strip() == '': self.log.error('Empty password supplied') return None # cast server_hosts to list if isinstance(self.server_hosts, str): self.server_hosts = self.server_hosts.split() # validate hosts and populate server_pool object for host in self.server_hosts: host = host.strip().lower() if not self.validate_host(host): self.log.warning( "Host '%s' not supplied in approved format. Removing host from Server Pool", host) break server = self.create_ldap_server_obj(host) server_pool.add(server) conn_servers.extend([host]) # verify ldap connection object parameters are defined if len(server_pool.servers) < 1: self.log.error( "No hosts provided. ldap connection requires at least 1 host to connect to." ) return None if not self.bind_user_dn or self.bind_user_dn.strip() == '': self.log.error( "'bind_user_dn' config value undefined. requried for ldap connection" ) return None if not self.bind_user_password or self.bind_user_password.strip( ) == '': self.log.error( "'bind_user_password' config value undefined. requried for ldap connection" ) return None # verify ldap search object parameters are defined if not self.user_search_base or self.user_search_base.strip() == '': self.log.error( "'user_search_base' config value undefined. requried for ldap search" ) return None if not self.user_search_filter or self.user_search_filter.strip( ) == '': self.log.error( "'user_search_filter' config value undefined. requried for ldap search" ) return None # open ldap connection and authenticate self.log.debug("Attempting ldap connection to %s with user '%s'", conn_servers, self.bind_user_dn) conn = self.ldap_connection(server_pool, self.bind_user_dn, self.bind_user_password) # proceed if connection has been established if not conn or not conn.bind(): self.log.error( "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.", conn_servers, self.bind_user_dn) return None else: self.log.debug( "Successfully established connection to %s with user '%s'", conn_servers, self.bind_user_dn) # compile list of permitted groups permitted_groups = copy.deepcopy(self.allowed_groups) if self.allow_nested_groups: for group in self.allowed_groups: nested_groups = self.get_nested_groups(conn, group) permitted_groups.extend(nested_groups) # format user search filter auth_user_search_filter = self.user_search_filter.format( username=username) # search for authenticating user in ldap self.log.debug("Attempting LDAP search using search_filter '%s'.", auth_user_search_filter) conn.search(search_base=self.user_search_base, search_filter=auth_user_search_filter, search_scope=ldap3.SUBTREE, attributes=self.user_membership_attribute, paged_size=2) # handle abnormal search results if not conn.response or 'attributes' not in conn.response[0].keys( ): self.log.error("LDAP search '%s' found %i result(s).", auth_user_search_filter, len(conn.response)) return None elif len(conn.response) > 1: self.log.error( "LDAP search '%s' found %i result(s). Please narrow search to 1 result.", auth_user_search_filter, len(conn.response)) return None else: self.log.debug("LDAP search '%s' found %i result(s).", auth_user_search_filter, len(conn.response)) # copy response to var search_response = copy.deepcopy(conn.response[0]) # get authenticating user's ldap attributes if not search_response['dn'] or search_response[ 'dn'].strip == '': self.log.error( "Search results for user '%s' returned 'dn' attribute with undefined or null value.", username) conn.unbind() return None else: self.log.debug( "Search results for user '%s' returned 'dn' attribute as '%s'", username, search_response['dn']) auth_user_dn = search_response['dn'] if not search_response['attributes'][ self.user_membership_attribute]: self.log.error( "Search results for user '%s' returned '%s' attribute with undefned or null value.", username, self.user_membership_attribute) conn.unbind() return None else: self.log.debug( "Search results for user '%s' returned '%s' attribute as %s", username, self.user_membership_attribute, search_response['attributes'][ self.user_membership_attribute]) auth_user_memberships = search_response['attributes'][ self.user_membership_attribute] # is authenticating user a member of permitted_groups allowed_memberships = list( set(auth_user_memberships).intersection(permitted_groups)) if bool(allowed_memberships) or not self.filter_by_group: self.log.debug( "User '%s' found in the following allowed ldap groups %s. Proceeding with authentication.", username, allowed_memberships) # rebind ldap connection with authenticating user, gather results, and close connection conn.rebind(user=auth_user_dn, password=password) auth_bound = copy.deepcopy(conn.bind()) conn.unbind() if not auth_bound: self.log.error( "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.", conn_servers, self.bind_user_dn) auth_response = None else: self.log.info( "User '%s' sucessfully authenticated against ldap server %r.", username, conn_servers) auth_response = username else: self.log.error( "User '%s' is not a member of any permitted groups %s", username, permitted_groups) auth_response = None permitted_groups = None return auth_response
class _Selection(DescriptionWidget, ValueWidget, CoreWidget): """Base class for Selection widgets ``options`` can be specified as a list of values, list of (label, value) tuples, or a dict of {label: value}. The labels are the strings that will be displayed in the UI, representing the actual Python choices, and should be unique. If labels are not specified, they are generated from the values. When programmatically setting the value, a reverse lookup is performed among the options to check that the value is valid. The reverse lookup uses the equality operator by default, but another predicate may be provided via the ``equals`` keyword argument. For example, when dealing with numpy arrays, one may set equals=np.array_equal. """ value = Any(None, help="Selected value", allow_none=True) label = Unicode(None, help="Selected label", allow_none=True) index = Int(None, help="Selected index", allow_none=True).tag(sync=True) options = Any( (), help= """Iterable of values, (label, value) pairs, or a mapping of {label: value} pairs that the user can select. Any assigned value is converted to a tuple of ('label', value) pairs. The labels are the strings that will be displayed in the UI, representing the actual Python choices, and should be unique. """) # This being read-only means that it cannot be changed from the frontend! _options_labels = Tuple(read_only=True, help="The labels for the options.").tag(sync=True) disabled = Bool(help="Enable or disable user changes").tag(sync=True) def __init__(self, *args, **kwargs): self.equals = kwargs.pop('equals', lambda x, y: x == y) # We have to make the basic options bookkeeping consistent # so we don't have errors the first time validators run self._initializing_traits_ = True options = _make_options(kwargs.get('options', ())) self.set_trait('_options_labels', tuple(i[0] for i in options)) self._options_values = tuple(i[1] for i in options) # Select the first item by default, if we can if 'index' not in kwargs and 'value' not in kwargs and 'label' not in kwargs: kwargs['index'] = 0 if len(options) > 0 else None kwargs['label'], kwargs['value'] = options[0] if len( options) > 0 else (None, None) super(_Selection, self).__init__(*args, **kwargs) self._initializing_traits_ = False @validate('options') def _validate_options(self, proposal): return _make_options(proposal.value) @observe('options') def _propagate_options(self, change): "Unselect any option if we aren't initializing" self.set_trait('_options_labels', tuple(i[0] for i in change.new)) self._options_values = tuple(i[1] for i in change.new) if self._initializing_traits_ is not True: self.index = 0 if len(change.new) > 0 else None @validate('index') def _validate_index(self, proposal): if proposal.value is None or 0 <= proposal.value < len( self._options_labels): return proposal.value else: raise TraitError('Invalid selection: index out of bounds') @observe('index') def _propagate_index(self, change): "Propagate changes in index to the value and label properties" label = self._options_labels[ change.new] if change.new is not None else None value = self._options_values[ change.new] if change.new is not None else None if self.label is not label: self.label = label if self.value is not value: self.value = value @validate('value') def _validate_value(self, proposal): value = proposal.value try: return findvalue(self._options_values, value, self.equals) if value is not None else None except ValueError: raise TraitError('Invalid selection: value not found') @observe('value') def _propagate_value(self, change): index = self._options_values.index( change.new) if change.new is not None else None if self.index != index: self.index = index @validate('label') def _validate_label(self, proposal): if (proposal.value is not None) and (proposal.value not in self._options_labels): raise TraitError('Invalid selection: label not found') return proposal.value @observe('label') def _propagate_label(self, change): index = self._options_labels.index( change.new) if change.new is not None else None if self.index != index: self.index = index def _repr_keys(self): keys = super(_Selection, self)._repr_keys() # Include options manually, as it isn't marked as synced: for key in sorted(chain(keys, ('options', ))): if key == 'index' and self.index == 0: # Index 0 is default when there are options continue yield key
class NamespacedResourceReflector(LoggingConfigurable): """ Base class for keeping a local up-to-date copy of a set of kubernetes resources. Must be subclassed once per kind of resource that needs watching. """ labels = Dict({}, config=True, help=""" Labels to reflect onto local cache """) fields = Dict({}, config=True, help=""" Fields to restrict the reflected objects """) namespace = Unicode(None, allow_none=True, help=""" Namespace to watch for resources in """) resources = Dict({}, help=""" Dictionary of resource names to the appropriate resource objects. This can be accessed across threads safely. """) kind = Unicode('resource', help=""" Human readable name for kind of object we're watching for. Used for diagnostic messages. """) list_method_name = Unicode("", help=""" Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources. This will be passed a namespace & a label selector. You most likely want something of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will give you a PodReflector. This must be set by a subclass. """) api_group_name = Unicode('CoreV1Api', help=""" Name of class that represents the apigroup on which `list_method_name` is to be found. Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses, for example, you would have to use ExtensionsV1beta1Api """) request_timeout = Int(0, config=True, help=""" Network timeout for kubernetes watch. Trigger watch reconnect when no traffic has been received for this time. This can be used to restart the watch periodically. """) timeout_seconds = Int(10, config=True, help=""" Timeout for kubernetes watch. Trigger watch reconnect when no watch event has been received. This will cause a full reload of the currently existing resources from the API server. """) on_failure = Any( help="""Function to be called when the reflector gives up.""") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Load kubernetes config here, since this is a Singleton and # so this __init__ will be run way before anything else gets run. try: config.load_incluster_config() except config.ConfigException: config.load_kube_config() self.api = shared_client(self.api_group_name) # FIXME: Protect against malicious labels? self.label_selector = ','.join( ['{}={}'.format(k, v) for k, v in self.labels.items()]) self.field_selector = ','.join( ['{}={}'.format(k, v) for k, v in self.fields.items()]) self.first_load_future = Future() self._stop_event = threading.Event() self.start() def __del__(self): self.stop() def _list_and_update(self): """ Update current list of resources by doing a full fetch. Overwrites all current resource info. """ initial_resources = getattr(self.api, self.list_method_name)( self.namespace, label_selector=self.label_selector, field_selector=self.field_selector, _request_timeout=self.request_timeout, ) # This is an atomic operation on the dictionary! self.resources = {p.metadata.name: p for p in initial_resources.items} # return the resource version so we can hook up a watch return initial_resources.metadata.resource_version def _watch_and_update(self): """ Keeps the current list of resources up-to-date This method is to be run not on the main thread! We first fetch the list of current resources, and store that. Then we register to be notified of changes to those resources, and keep our local store up-to-date based on these notifications. We also perform exponential backoff, giving up after we hit 32s wait time. This should protect against network connections dropping and intermittent unavailability of the api-server. Every time we recover from an exception we also do a full fetch, to pick up changes that might've been missed in the time we were not doing a watch. Note that we're playing a bit with fire here, by updating a dictionary in this thread while it is probably being read in another thread without using locks! However, dictionary access itself is atomic, and as long as we don't try to mutate them (do a 'fetch / modify / update' cycle on them), we should be ok! """ cur_delay = 0.1 self.log.info( "CHANGED CODE watching for %s with label selector %s / field selector %s in namespace %s", self.kind, self.label_selector, self.field_selector, self.namespace) while True: w = watch.Watch() try: resource_version = self._list_and_update() if not self.first_load_future.done(): # signal that we've loaded our initial data self.first_load_future.set_result(None) watch_args = { 'namespace': self.namespace, 'label_selector': self.label_selector, 'field_selector': self.field_selector, 'resource_version': resource_version, } if self.request_timeout: # set network receive timeout watch_args['_request_timeout'] = self.request_timeout if self.timeout_seconds: # set watch timeout watch_args['timeout_seconds'] = self.timeout_seconds # in case of timeout_seconds, the w.stream just exits (no exception thrown) # -> we stop the watcher and start a new one for ev in w.stream(getattr(self.api, self.list_method_name), **watch_args): cur_delay = 0.1 resource = ev['object'] if ev['type'] == 'DELETED': # This is an atomic delete operation on the dictionary! self.resources.pop(resource.metadata.name, None) else: # This is an atomic operation on the dictionary! self.resources[resource.metadata.name] = resource if self._stop_event.is_set(): break except ReadTimeoutError: # network read time out, just continue and restart the watch continue except Exception: cur_delay = cur_delay * 2 if cur_delay > 30: self.log.exception( "Watching resources never recovered, giving up") if self.on_failure: self.on_failure() return self.log.exception( "Error when watching resources, retrying in %ss", cur_delay) time.sleep(cur_delay) continue finally: w.stop() if self._stop_event.is_set(): self.log.info("%s watcher stopped", self.kind) break def start(self): """ Start the reflection process! We'll do a blocking read of all resources first, so that we don't race with any operations that are checking the state of the pod store - such as polls. This should be called only once at the start of program initialization (when the singleton is being created), and not afterwards! """ if hasattr(self, 'watch_thread'): raise ValueError( 'Thread watching for resources is already running') self._list_and_update() self.watch_thread = threading.Thread(target=self._watch_and_update) # If the watch_thread is only thread left alive, exit app self.watch_thread.daemon = True self.watch_thread.start() def stop(self): self._stop_event.set() def stopped(self): return self._stop_event.is_set()
class DataGenerator(Configurable): title_in = Unicode( '/bos/data1/sogou16/data/training/1m_title.pad_t50', help='titles term id csv, must be padded').tag(config=True) max_q_len = Int(10, help='max q len').tag(config=True) max_d_len = Int(50, help='max document len').tag(config=True) q_name = Unicode('q') d_name = Unicode('d') aux_d_name = Unicode('d_aux') idf_name = Unicode('idf') neg_sample = Int(1, help='negative sample').tag(config=True) load_litle_pool = Bool(False, help='load little pool at beginning').tag(conf=True) min_score_diff = Float( 0, help='min score difference for click data generated pairs').tag( config=True) vocabulary_size = Int(2000000).tag(config=True) def __init__(self, **kwargs): super(DataGenerator, self).__init__(**kwargs) #TODO check this self.m_title_pool = np.array(None) if self.load_litle_pool and self.neg_sample: self._load_title_pool() print("min_score_diff: ", self.min_score_diff) print("generator's vocabulary size: ", self.vocabulary_size) def _load_title_pool(self): if self.title_in: logging.info('start loading title pool [%s]', self.title_in) self.m_title_pool = genfromtxt( self.title_in, delimiter=',', dtype=int, ) logging.info('loaded [%d] title pool', self.m_title_pool.shape[0]) def pointwise_generate(self, pair_stream, batch_size, with_label=True, with_idf=False): """ to use: initial the generator = ClickDataGenerator(config=conf) and then for X,Y in generator.pointwise_generator(pair_stream, batch_size) :param pair_stream: the (probably infinite) stream of query \t clicked url e.g. itertools.cycle(open(file)) :param batch_size: int, a batch size :param with_label: if True, then there is a third column in pair_stream: \t label (int) :param with_idf: if True, the third col in pair_stream is the query term idf :return: yield a batched X and Y """ l_q = [] l_d = [] l_idf = [] l_y = [] for line in pair_stream: cols = line.split('\t') q = np.array( [int(t) for t in cols[0].split(',')] ) #np.loadtxt(StringIO(unicode(cols[0])), delimiter=',', dtype=int,) doc = np.array( [int(t) for t in cols[1].split(',')] ) #np.loadtxt(StringIO(unicode(cols[1])), delimiter=',', dtype=int,) if with_idf: idf = np.ones(len(q)) else: idf = np.array([int(t) for t in cols[2].split(',')]) y = 0 if with_label: y = int(cols[-1]) v_q = np.ones(self.max_q_len) * -1 v_d = np.ones(self.max_d_len) * -1 v_q[:min(q.shape[0], self.max_q_len )] = q[0:min(q.shape[0], self.max_q_len)] v_d[:min(doc.shape[0], self.max_d_len )] = doc[0:min(doc.shape[0], self.max_d_len)] l_q.append(v_q) l_d.append(v_d) l_y.append(y) if with_idf: v_idf = np.zeros(self.max_q_len) v_idf[:idf.shape[0]] = idf[0:min(q.shape[0], self.max_q_len)] l_idf.append(v_idf) if len(l_q) >= batch_size: Q = np.array( l_q, dtype=int, ) D = np.array( l_d, dtype=int, ) IDF = None if with_idf: IDF = np.array(l_idf, dtype=float) Y = np.array( l_y, dtype=int, ) X = {self.q_name: Q, self.d_name: D, self.idf_name: IDF} yield X, Y l_q, l_d, l_y, l_idf = [], [], [], [] if l_q: Q = np.array( l_q, dtype=int, ) D = np.array( l_d, dtype=int, ) IDF = None if with_idf: IDF = np.array(l_idf, dtype=float) Y = np.array( l_y, dtype=int, ) X = {self.q_name: Q, self.d_name: D, self.idf_name: IDF} yield X, Y logging.info('point wise generator to an end') def pairwise_generate(self, pair_stream, batch_size, with_idf=False): """ to use: initial the generator = ClickDataGenerator(config=conf) and then for X,Y in generator.pairwise_generate(pair_stream, batch_size) :param pair_stream: the (probably infinite) stream of query \t clicked url e.g. itertools.cycle(open(file)) :param batch_size: must be neg_sample * k size :param with_idf: if True, the third col in pair_stream is the query term idf :return: yield a batched X and Y NOTE: the Y is always 1, the order of pos and neg docs are not shuffled yet. """ assert batch_size % self.neg_sample == 0 pos_batch_size = batch_size / self.neg_sample for pos_X, pos_Y in self.pointwise_generate(pair_stream, pos_batch_size, with_label=False, with_idf=with_idf): idx = np.random.randint(self.m_title_pool.shape[0], size=batch_size) aux_D = self.m_title_pool[idx, :] new_Q = np.repeat(pos_X[self.q_name], self.neg_sample, axis=0) new_D = np.repeat(pos_X[self.d_name], self.neg_sample, axis=0) new_IDF = None if with_idf: new_IDF = np.repeat(pos_X[self.idf_name], self.neg_sample, axis=0) X = { self.q_name: new_Q, self.d_name: new_D, self.aux_d_name: aux_D, self.idf_name: new_IDF } Y = np.ones(batch_size) yield X, Y def pairwise_reader(self, pair_stream, batch_size, with_idf=False): l_q = [] l_d = [] l_d_aux = [] l_idf = [] l_y = [] for line in pair_stream: cols = line.strip().split('\t') if len(cols) < 4: continue flag = True for col in cols: if not col.strip(): flag = False break if not flag: print(line) continue y = float(cols[3]) if abs(y) < self.min_score_diff: continue q = np.array([ int(t) for t in cols[0].split(',') if int(t) < self.vocabulary_size ]) t1 = np.array([ int(t) for t in cols[1].split(',') if int(t) < self.vocabulary_size ]) t2 = np.array([ int(t) for t in cols[2].split(',') if int(t) < self.vocabulary_size ]) if y > 0: y = 1 else: t1, t2 = t2, t1 # make the first always positive y = 1 if with_idf: if len(cols) < 5: idf = np.ones(len(q)) else: idf = np.array([float(t) for t in cols[4].split(',')]) v_q = np.ones(self.max_q_len) * -1 v_d = np.ones(self.max_d_len) * -1 v_d_aux = np.ones(self.max_d_len) * -1 v_q[:min(q.shape[0], self.max_q_len )] = q[:min(q.shape[0], self.max_q_len)] v_d[:min(t1.shape[0], self.max_d_len )] = t1[:min(t1.shape[0], self.max_d_len)] v_d_aux[:min(t2.shape[0], self.max_d_len )] = t2[:min(t2.shape[0], self.max_d_len)] l_q.append(v_q) l_d.append(v_d) l_d_aux.append(v_d_aux) l_y.append(y) if with_idf: v_idf = np.zeros(self.max_q_len) v_idf[:min(idf.shape[0], self.max_q_len )] = idf[:min(idf.shape[0], self.max_q_len)] l_idf.append(v_idf) if len(l_q) >= batch_size: Q = np.array( l_q, dtype=int, ) D = np.array( l_d, dtype=int, ) D_aux = np.array(l_d_aux, dtype=int) IDF = None if with_idf: IDF = np.array(l_idf, dtype=float) Y = np.array( l_y, dtype=int, ) X = { self.q_name: Q, self.d_name: D, self.idf_name: IDF, self.aux_d_name: D_aux } yield X, Y l_q, l_d, l_d_aux, l_y, l_idf = [], [], [], [], [] if l_q: Q = np.array( l_q, dtype=int, ) D = np.array( l_d, dtype=int, ) D_aux = np.array( l_d_aux, dtype=int, ) IDF = None if with_idf: IDF = np.array(l_idf, dtype=float) Y = np.array( l_y, dtype=int, ) X = { self.q_name: Q, self.d_name: D, self.idf_name: IDF, self.aux_d_name: D_aux } yield X, Y logging.info('pair wise reader to an end')
class Map(DOMWidget, InteractMixin): _view_name = Unicode('LeafletMapView').tag(sync=True) _model_name = Unicode('LeafletMapModel').tag(sync=True) _view_module = Unicode('jupyter-leaflet').tag(sync=True) _model_module = Unicode('jupyter-leaflet').tag(sync=True) _view_module_version = Unicode(EXTENSION_VERSION).tag(sync=True) _model_module_version = Unicode(EXTENSION_VERSION).tag(sync=True) # Map options center = List(def_loc).tag(sync=True, o=True) zoom_start = Int(12).tag(sync=True, o=True) zoom = Int(12).tag(sync=True, o=True) max_zoom = Int(18).tag(sync=True, o=True) min_zoom = Int(1).tag(sync=True, o=True) interpolation = Unicode('bilinear').tag(sync=True, o=True) # Specification of the basemap basemap = Dict(default_value=dict( url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', max_zoom=19, attribution= 'Map data (c) <a href="https://openstreetmap.org">OpenStreetMap</a> contributors' )).tag(sync=True, o=True) modisdate = Unicode('yesterday').tag(sync=True) # Interaction options dragging = Bool(True).tag(sync=True, o=True) touch_zoom = Bool(True).tag(sync=True, o=True) scroll_wheel_zoom = Bool(False).tag(sync=True, o=True) double_click_zoom = Bool(True).tag(sync=True, o=True) box_zoom = Bool(True).tag(sync=True, o=True) tap = Bool(True).tag(sync=True, o=True) tap_tolerance = Int(15).tag(sync=True, o=True) world_copy_jump = Bool(False).tag(sync=True, o=True) close_popup_on_click = Bool(True).tag(sync=True, o=True) bounce_at_zoom_limits = Bool(True).tag(sync=True, o=True) keyboard = Bool(True).tag(sync=True, o=True) keyboard_pan_offset = Int(80).tag(sync=True, o=True) keyboard_zoom_offset = Int(1).tag(sync=True, o=True) inertia = Bool(True).tag(sync=True, o=True) inertia_deceleration = Int(3000).tag(sync=True, o=True) inertia_max_speed = Int(1500).tag(sync=True, o=True) # inertia_threshold = Int(?, o=True).tag(sync=True) zoom_control = Bool(True).tag(sync=True, o=True) attribution_control = Bool(True).tag(sync=True, o=True) # fade_animation = Bool(?).tag(sync=True, o=True) # zoom_animation = Bool(?).tag(sync=True, o=True) zoom_animation_threshold = Int(4).tag(sync=True, o=True) # marker_zoom_animation = Bool(?).tag(sync=True, o=True) options = List(trait=Unicode).tag(sync=True) @default('options') def _default_options(self): return [name for name in self.traits(o=True)] south = Float(def_loc[0], read_only=True).tag(sync=True) north = Float(def_loc[0], read_only=True).tag(sync=True) east = Float(def_loc[1], read_only=True).tag(sync=True) west = Float(def_loc[1], read_only=True).tag(sync=True) layers = Tuple(trait=Instance(Layer)).tag(sync=True, **widget_serialization) @default('layers') def _default_layers(self): return (basemap_to_tiles(self.basemap, self.modisdate, base=True), ) bounds = Tuple(read_only=True) bounds_polygon = Tuple(read_only=True) @observe('south', 'north', 'east', 'west') def _observe_bounds(self, change): self.set_trait('bounds', ((self.south, self.west), (self.north, self.east))) self.set_trait('bounds_polygon', ((self.north, self.west), (self.north, self.east), (self.south, self.east), (self.south, self.west))) def __init__(self, **kwargs): super(Map, self).__init__(**kwargs) self.on_displayed(self._fire_children_displayed) self.on_msg(self._handle_leaflet_event) def _fire_children_displayed(self, widget, **kwargs): for layer in self.layers: layer._handle_displayed(**kwargs) for control in self.controls: control._handle_displayed(**kwargs) _layer_ids = List() @validate('layers') def _validate_layers(self, proposal): '''Validate layers list. Makes sure only one instance of any given layer can exist in the layers list. ''' self._layer_ids = [l.model_id for l in proposal.value] if len(set(self._layer_ids)) != len(self._layer_ids): raise LayerException( 'duplicate layer detected, only use each layer once') return proposal.value def add_layer(self, layer): if isinstance(layer, dict): layer = basemap_to_tiles(layer) if layer.model_id in self._layer_ids: raise LayerException('layer already on map: %r' % layer) self.layers = tuple([l for l in self.layers] + [layer]) def remove_layer(self, layer): if layer.model_id not in self._layer_ids: raise LayerException('layer not on map: %r' % layer) self.layers = tuple( [l for l in self.layers if l.model_id != layer.model_id]) def substitute_layer(self, old, new): if isinstance(new, dict): new = basemap_to_tiles(new) if old.model_id not in self._layer_ids: raise LayerException( 'Could not substitute layer: layer not on map.') self.layers = tuple( [new if l.model_id == old.model_id else l for l in self.layers]) def clear_layers(self): self.layers = () controls = Tuple(trait=Instance(Control)).tag(sync=True, **widget_serialization) _control_ids = List() @validate('controls') def _validate_controls(self, proposal): '''Validate controls list. Makes sure only one instance of any given layer can exist in the controls list. ''' self._control_ids = [c.model_id for c in proposal.value] if len(set(self._control_ids)) != len(self._control_ids): raise ControlException( 'duplicate control detected, only use each control once') return proposal.value def add_control(self, control): if control.model_id in self._control_ids: raise ControlException('control already on map: %r' % control) self.controls = tuple([c for c in self.controls] + [control]) def remove_control(self, control): if control.model_id not in self._control_ids: raise ControlException('control not on map: %r' % control) self.controls = tuple( [c for c in self.controls if c.model_id != control.model_id]) def clear_controls(self): self.controls = () def __iadd__(self, item): if isinstance(item, Layer): self.add_layer(item) elif isinstance(item, Control): self.add_control(item) return self def __isub__(self, item): if isinstance(item, Layer): self.remove_layer(item) elif isinstance(item, Control): self.remove_control(item) return self def __add__(self, item): if isinstance(item, Layer): self.add_layer(item) elif isinstance(item, Control): self.add_control(item) return self # Event handling _interaction_callbacks = Instance(CallbackDispatcher, ()) def _handle_leaflet_event(self, _, content, buffers): if content.get('event', '') == 'interaction': self._interaction_callbacks(**content) def on_interaction(self, callback, remove=False): self._interaction_callbacks.register_callback(callback, remove=remove)
class DisplayDL1Calib(Tool): name = "DisplayDL1Calib" description = "Calibrate dl0 data to dl1, and plot the photoelectron " \ "images." telescope = Int(None, allow_none=True, help='Telescope to view. Set to None to display all ' 'telescopes.').tag(config=True) aliases = Dict( dict(f='EventFileReaderFactory.input_path', r='EventFileReaderFactory.reader', max_events='EventFileReaderFactory.max_events', extractor='ChargeExtractorFactory.extractor', window_width='ChargeExtractorFactory.window_width', t0='ChargeExtractorFactory.t0', window_shift='ChargeExtractorFactory.window_shift', sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG', sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG', lwt='ChargeExtractorFactory.lwt', clip_amplitude='CameraDL1Calibrator.clip_amplitude', T='DisplayDL1Calib.telescope', O='ImagePlotter.output_path')) flags = Dict( dict(D=({ 'ImagePlotter': { 'display': True } }, "Display the photoelectron images on-screen as they " "are produced."))) classes = List([ EventFileReaderFactory, ChargeExtractorFactory, CameraDL1Calibrator, ImagePlotter ]) def __init__(self, **kwargs): super().__init__(**kwargs) self.reader = None self.calibrator = None self.plotter = None def setup(self): self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]" kwargs = dict(config=self.config, tool=self) reader_factory = EventFileReaderFactory(**kwargs) reader_class = reader_factory.get_class() self.reader = reader_class(**kwargs) self.calibrator = CameraCalibrator(origin=self.reader.origin, **kwargs) self.plotter = ImagePlotter(**kwargs) def start(self): source = self.reader.read() for event in source: self.calibrator.calibrate(event) tel_list = event.r0.tels_with_data if self.telescope: if self.telescope not in tel_list: continue tel_list = [self.telescope] for telid in tel_list: self.plotter.plot(event, telid) def finish(self): self.plotter.finish()
class ChargeResolutionGenerator(Tool): name = "ChargeResolutionGenerator" description = "Generate the a pickle file of ChargeResolutionFile for " \ "either MC or data files." telescopes = Int(1, help='Telescopes to include from the event file. ' 'Default = 1').tag(config=True) output_name = Unicode('charge_resolution', help='Name of the output charge resolution hdf5 ' 'file').tag(config=True) input_path = Unicode(help='Path to directory containing data').tag( config=True) max_events = Int(1, help='Maximum number of events to use').tag(config=True) plot_cam = Bool(False, "enable plotting of individual camera").tag(config=True) use_true_pe = Bool(False, "Use true mc p.e.").tag(config=True) calibrator = Unicode( 'HESSIOR1Calibrator', help='which calibrator to use, default = HESSIOR1Calibrator').tag( config=True) aliases = Dict( dict(input_path='ChargeResolutionGenerator.input_path', calibrator='ChargeResolutionGenerator.calibrator', max_events='ChargeResolutionGenerator.max_events', extractor='ChargeExtractorFactory.product', window_width='ChargeExtractorFactory.window_width', t0='ChargeExtractorFactory.t0', window_shift='ChargeExtractorFactory.window_shift', sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG', sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG', lwt='ChargeExtractorFactory.lwt', clip_amplitude='CameraDL1Calibrator.clip_amplitude', radius='CameraDL1Calibrator.radius', max_pe='ChargeResolutionCalculator.max_pe', T='ChargeResolutionGenerator.telescopes', o='ChargeResolutionGenerator.output_name', plot_cam='ChargeResolutionGenerator.plot_cam', use_true_pe='ChargeResolutionGenerator.use_true_pe')) classes = List([ EventSourceFactory, HESSIOEventSource, TargetIOEventSource, ChargeExtractorFactory, CameraDL1Calibrator, ChargeResolutionCalculator, CameraCalibrator ]) def __init__(self, **kwargs): super().__init__(**kwargs) self.eventsource = None self.r1 = None self.dl0 = None self.dl1 = None self.calculator = None self.cal = None def setup(self): kwargs = dict(config=self.config, tool=self) self.dl0 = CameraDL0Reducer(**kwargs) self.dl1 = CameraDL1Calibrator(**kwargs) self.cal = CameraCalibrator(r1_product=self.calibrator) self.calculator = ChargeResolutionCalculator(**kwargs) def start(self): run_list = np.loadtxt('%s/runlist.txt' % self.input_path, unpack=True) plot_cam = False plot_delay = 0.5 disp = None n_events = [] trig_eff = [] n_pe = [] if debug: fig = plt.figure(1) ax = fig.add_subplot(111) for n, run in enumerate(run_list[0]): n_events.append(run_list[3][n]) n_pe.append(run_list[2][n]) # TODO remove need for hardcoded file name if self.calibrator == "TargetIOR1Calibrator": file_name = "%s/Run%05d_r1.tio" % (self.input_path, int(run)) print(file_name) elif self.calibrator == "HESSIOR1Calibrator": file_name = "%s/Run%05d_mc.simtel.gz" % (self.input_path, int(run)) print(file_name) try: source = EventSourceFactory.produce(input_url=file_name, max_events=self.max_events) true_pe = [] # lab_pe = [] peds_all = [] n_trig = 0 for event in tqdm(source): n_trig = +1 # true_pe.append() # self.cal.calibrate(event) # self.dl0.reduce(event) # self.dl1.calibrate(event) # input_pe = run_list[2][n] # try: # input_nsb = run_list[5][n] # except IndexError: # print('File has no column for NSB, setting to 0') # input_nsb = 0 # if self.plot_cam == True: # if disp is None: # geom = event.inst.subarray.tel[self.telescopes].camera # disp = CameraDisplay(geom) # disp.add_colorbar() # plt.show(block=False) # im = event.dl1.tel[self.telescopes].image[0] # disp.image = im # plt.pause(plot_delay) # # teldata = event.r0.tel[self.telescopes].waveform[0] # peds = teldata[:, 0:10].mean(axis=1) # peds2 = teldata[:, 0:10].std(axis=1) # peds_all.append(teldata[:, 0:90]) # # plt.hist(peds,bins=50, alpha=0.4) # # plt.show() # # print(teldata) # # plt.plot(range(len(teldata[100])), teldata[100]) # # plt.show() # # exit() # # print(np.mean(peds_all), np.std(peds_all)) # # exit() # # true_charge_mc = event.mc.tel[self.telescopes].photo_electron_image # # measured_charge = event.dl1.tel[self.telescopes].image[0] # # true_charge_lab = np.asarray([input_pe]*len(measured_charge)) # # true_pe.append(true_charge_mc) # # if self.use_true_pe: # # true_charge=true_charge_mc # # else: # # true_charge=true_charge_lab.astype(int) # # # # self.calculator.add_charges(true_charge, measured_charge) # # if debug: # # plt.errorbar(input_nsb, np.mean(peds_all), np.std(peds_all),color='k') # plt.scatter(input_nsb, np.std(peds_all), marker ='x',color='k') except FileNotFoundError: stop = 0 print('file_not_found') trig_eff.append(n_trig / run_list[3][n]) plt.plot(n_pe, trig_eff) plt.show() if debug: plt.xscale('log') plt.yscale('log') plt.plot([0, 1000], [0, 1000], 'k:') plt.xlabel('Input p.e.') plt.ylabel('True mc p.e.') plt.show() def finish(self): out_file = '%s/charge_resolution_test.h5' % self.input_path self.calculator.save(self.output_name)
class Paperboy(Application): """Base class for paperboy applications""" name = 'paperboy' description = 'paperboy' ############ # Gunicorn # ############ workers = Int(default_value=1, help="Number of gunicorn workers").tag(config=True) port = Unicode(default_value='8080', help="Port to run on").tag(config=True) ############ ########## # Falcon # ########## api = Instance(falcon.API, help="A Falcon API instance").tag(config=True) ########## ######## # URLs # ######## baseurl = Unicode(default_value='/', help="Base URL (for reverse proxies)").tag(config=True) apiurl = Unicode( default_value='/api/v1/', help="API base URL (for reverse proxies)").tag(config=True) loginurl = Unicode(default_value='login', help="login url").tag(config=True) logouturl = Unicode(default_value='logout', help="logout url").tag(config=True) registerurl = Unicode(default_value='register', help="register url").tag(config=True) ######## ######## # Auth # ######## http = Bool( default_value=True, help="Running on HTTP (as opposed to https, so token is insecure)" ).tag(config=True) include_password = Bool(default_value=False).tag(config=True) include_register = Bool(default_value=True).tag(config=True) token_timeout = Int(default_value=600).tag(config=True) ############# ########## # Config # ########## # FIXME doesnt allow default_value yet user_config = UserConfig notebook_config = NotebookConfig job_config = JobConfig report_config = ReportConfig ########## ############## # Middleware # ############## essential_middleware = [ CORSMiddleware(allow_all_origins=True).middleware, MultipartMiddleware() ] extra_middleware = List( default_value=[]) # List of extra middlewares to install auth_required_middleware = Instance(object) load_user_middleware = Instance(object) ############## ################## # Custom handler # ################## extra_handlers = List( trait=Tuple(), default_value=[] ) # List of tuples (route, handler) of handlers to install ################## ########################################## # Predefined Configurations # # ########################################## backend = Unicode( default_value='dummy', help="Backend set to use, options are {sqla, custom}").tag(config=True) scheduler = Unicode( default_value='dummy', help="Scheduler type to use, options are {dummy, airflow, luigi}").tag( config=True) auth = Unicode( default_value='dummy', help= "Authentication backend set to use, options are {none, sqla, custom}" ).tag(config=True) secret = Unicode() @validate('backend') def _validate_backend(self, proposed): if proposed['value'] not in ( 'custom', 'dummy', 'git', 'sqla', ): raise TraitError('backend not recognized: {}'.format( proposed['value'])) return proposed['value'] @validate('auth') def _validate_auth(self, proposed): if proposed['value'] not in ( 'custom', 'none', 'sqla', ): raise TraitError('backend not recognized: {}'.format( proposed['value'])) return proposed['value'] ########################################## ########### # Storage # ########### # FIXME doesnt allow default_value yet storage = SQLAStorageConfig() dev = Bool(default_value=False) ########### ############# # Scheduler # ############# # FIXME doesnt allow default_value yet scheduler_config = Instance(klass=SchedulerConfig, args=(), kwargs={}) ############# ################## # Output # ################## output = Instance(klass=LocalOutputConfig, args=(), kwargs={}) ################## def start(self): """Start the whole thing""" self.port = os.environ.get('PORT', self.port) options = { 'bind': '0.0.0.0:{}'.format(self.port), 'workers': self.workers } self.secret = str(uuid4()) if self.dev: self.sql_url = 'sqlite:///:memory:' logging.critical('Using SQL in memory backend') self.storage.engine = create_engine(self.storage.sql_url, echo=False) Base.metadata.create_all(self.storage.engine) self.sessionmaker = sessionmaker(bind=self.storage.engine) self.backend = 'sqla' self.auth = 'sqla' self.extra_middleware = self.extra_middleware + [ SQLAlchemySessionMiddleware(self.storage.sessionmaker) ] self.storage.notebook_storage = NotebookSQLStorage self.storage.job_storage = JobSQLStorage self.storage.report_storage = ReportSQLStorage self.storage.user_storage = UserSQLStorage self.storage.sql_user = True logging.critical('Using SQL auth') self.auth_required_middleware = SQLAuthRequiredMiddleware self.load_user_middleware = SQLUserMiddleware logging.critical('Using Dummy scheduler') self.scheduler = 'dummy' self.scheduler_config = DummySchedulerConfig() else: # Preconfigured storage backends if self.backend == 'git': logging.critical('Using Git backend') raise NotImplementedError() # default to sqla # elif self.backend == 'sqla': else: logging.critical('Using SQL backend') self.storage.engine = create_engine( os.environ.get('PAPERBOY_SQL_URL') or self.storage.sql_url, echo=False) Base.metadata.create_all(self.storage.engine) self.storage.sessionmaker = sessionmaker( bind=self.storage.engine) self.extra_middleware = self.extra_middleware + [ SQLAlchemySessionMiddleware(self.storage.sessionmaker) ] self.storage.notebook_storage = NotebookSQLStorage self.storage.job_storage = JobSQLStorage self.storage.report_storage = ReportSQLStorage self.storage.user_storage = UserSQLStorage self.storage.sql_user = True self.auth = 'sqla' # Preconfigured auth backends if self.auth == 'none': logging.critical('Using No auth') self.auth_required_middleware = NoAuthRequiredMiddleware self.load_user_middleware = NoUserMiddleware elif self.auth == 'sqla': logging.critical('Using SQL auth') self.auth_required_middleware = SQLAuthRequiredMiddleware self.load_user_middleware = SQLUserMiddleware if self.scheduler == 'dummy': logging.critical('Using dummy scheduler') self.scheduler_config = DummySchedulerConfig() elif self.scheduler == 'airflow': logging.critical('Using Airflow scheduler') self.scheduler_config = AirflowSchedulerConfig() elif self.scheduler == 'luigi': logging.critical('Using Luigi scheduler') self.scheduler_config = LuigiSchedulerConfig() elif self.scheduler == 'local': logging.critical('Using Local scheduler') self.scheduler_config = LocalSchedulerConfig() else: raise Exception('Must specify a valid scheduler!') FalconDeploy(FalconAPI(self), options).run() @classmethod def launch_instance(cls, argv=None, **kwargs): """Launch an instance of a Paperboy Application""" return super(Paperboy, cls).launch_instance(argv=argv, **kwargs) def to_dict(self): return { 'name': self.name, 'description': self.description, 'workers': self.workers, 'port': self.port } aliases = { 'workers': 'Paperboy.workers', 'port': 'Paperboy.port', 'baseurl': 'Paperboy.baseurl', 'backend': 'Paperboy.backend', 'scheduler': 'Paperboy.scheduler', 'auth': 'Paperboy.auth', 'sql_url': 'Paperboy.storage.sql_url', } def _login_redirect(config, *args, **kwargs): raise falcon.HTTPFound(urljoin(config.baseurl, config.loginurl))
class Envelope(AudioNode, ScheduleObserveMixin): """ADSR envelope generator. Envelope outputs a signal which can be connected to a :class:`Signal`. """ _model_name = Unicode("EnvelopeModel").tag(sync=True) attack = Float(0.01, help="Envelope attack").tag(sync=True) decay = Float(0.1, help="Envelope decay").tag(sync=True) sustain = Float(1.0, help="Envelope sustain").tag(sync=True) release = Float(0.5, help="Envelope release").tag(sync=True) attack_curve = Union([Enum(CURVES), List(Float())], default_value="linear").tag(sync=True) decay_curve = Enum(BASIC_CURVES, default_value="exponential").tag(sync=True) release_curve = Union([Enum(CURVES), List(Float())], default_value="exponential").tag(sync=True) array = Array(allow_none=True, default_value=None, read_only=True, help="Envelope data").tag(sync=True, **data_array_serialization) array_length = Int( 1024, help="Envelope data resolution (array length)").tag(sync=True) sync_array = Bool(False, help="If True, synchronize envelope data").tag(sync=True) _observable_traits = List(["value"]) def __init__(self, **kwargs): if "_output" not in kwargs: out_node = Signal(units="normalRange", _create_node=False) kwargs.update({"_output": out_node}) super().__init__(**kwargs) def trigger_attack(self, time=None, velocity=1): add_or_send_event("triggerAttack", self, { "time": time, "velocity": velocity }) return self def trigger_release(self, time=None): add_or_send_event("triggerRelease", self, {"time": time}) return self def trigger_attack_release(self, duration, time=None, velocity=1): args = {"duration": duration, "time": time, "velocity": velocity} add_or_send_event("triggerAttackRelease", self, args) return self def _repr_keys(self): for key in super()._repr_keys(): yield key for key in ["attack", "decay", "sustain", "release"]: yield key
class BokehFileViewer(Tool): name = "BokehFileViewer" description = ("Interactively explore an event file using the bokeh " "visualisation package") port = Int(5006, help="Port to open bokeh server onto").tag(config=True) disable_server = Bool(False, help="Do not start the bokeh server " "(useful for testing)").tag(config=True) default_url = get_dataset_path("gamma_test_large.simtel.gz") EventSource.input_url.default_value = default_url cleaner_product = tool_utils.enum_trait(WaveformCleaner, default='NullWaveformCleaner') extractor_product = tool_utils.enum_trait( ChargeExtractor, default='NeighbourPeakIntegrator') aliases = Dict( dict( port='BokehFileViewer.port', disable_server='BokehFileViewer.disable_server', f='EventSource.input_url', max_events='EventSource.max_events', extractor='BokehFileViewer.extractor_product', cleaner='BokehFileViewer.cleaner_product', simpleintegrator_t0='SimpleIntegrator.t0', window_width='WindowIntegrator.window_width', window_shift='WindowIntegrator.window_shift', sig_amp_cut_HG='PeakFindingIntegrator.sig_amp_cut_HG', sig_amp_cut_LG='PeakFindingIntegrator.sig_amp_cut_LG', lwt='NeighbourPeakIntegrator.lwt', )) classes = List([ EventSource, CameraDL1Calibrator, ] + tool_utils.classes_with_traits(WaveformCleaner) + tool_utils.classes_with_traits(ChargeExtractor) + tool_utils.classes_with_traits(CameraR1Calibrator)) def __init__(self, **kwargs): super().__init__(**kwargs) self._event = None self._event_index = None self._event_id = None self._telid = None self._channel = None self.w_next_event = None self.w_previous_event = None self.w_event_index = None self.w_event_id = None self.w_goto_event_index = None self.w_goto_event_id = None self.w_telid = None self.w_channel = None self.w_dl1_dict = None self.wb_extractor = None self.layout = None self.reader = None self.seeker = None self.extractor = None self.cleaner = None self.r1 = None self.dl0 = None self.dl1 = None self.viewer = None self._updating_dl1 = False def setup(self): self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]" self.reader = EventSource.from_config(parent=self) self.seeker = EventSeeker(self.reader, parent=self) self.extractor = ChargeExtractor.from_name(self.extractor_product, parent=self) self.cleaner = WaveformCleaner.from_name(self.cleaner_product, parent=self) self.r1 = CameraR1Calibrator.from_eventsource(eventsource=self.reader, parent=self) self.dl0 = CameraDL0Reducer(parent=self) self.dl1 = CameraDL1Calibrator(extractor=self.extractor, cleaner=self.cleaner, parent=self) self.viewer = BokehEventViewer(parent=self) # Setup widgets self.viewer.create() self.viewer.enable_automatic_index_increment() self.create_previous_event_widget() self.create_next_event_widget() self.create_event_index_widget() self.create_goto_event_index_widget() self.create_event_id_widget() self.create_goto_event_id_widget() self.create_telid_widget() self.create_channel_widget() self.create_dl1_widgets() self.update_dl1_widget_values() # Setup layout self.layout = layout([[self.viewer.layout], [ self.w_previous_event, self.w_next_event, self.w_goto_event_index, self.w_goto_event_id ], [self.w_event_index, self.w_event_id], [self.w_telid, self.w_channel], [self.wb_extractor]]) def start(self): self.event_index = 0 def finish(self): if not self.disable_server: def modify_doc(doc): doc.add_root(self.layout) doc.title = self.name directory = os.path.abspath(os.path.dirname(__file__)) theme_path = os.path.join(directory, "theme.yaml") template_path = os.path.join(directory, "templates") doc.theme = Theme(filename=theme_path) env = jinja2.Environment( loader=jinja2.FileSystemLoader(template_path)) doc.template = env.get_template('index.html') self.log.info('Opening Bokeh application on ' 'http://localhost:{}/'.format(self.port)) server = Server({'/': modify_doc}, num_procs=1, port=self.port) server.start() server.io_loop.add_callback(server.show, "/") server.io_loop.start() @property def event_index(self): return self._event_index @event_index.setter def event_index(self, val): try: self.event = self.seeker[val] except IndexError: self.log.warning(f"Event Index {val} does not exist") @property def event_id(self): return self._event_id @event_id.setter def event_id(self, val): try: self.event = self.seeker[str(val)] except IndexError: self.log.warning(f"Event ID {val} does not exist") @property def telid(self): return self._telid @telid.setter def telid(self, val): self.channel = 0 tels = list(self.event.r0.tels_with_data) if val not in tels: val = tels[0] self._telid = val self.viewer.telid = val self.update_telid_widget() @property def channel(self): return self._channel @channel.setter def channel(self, val): self._channel = val self.viewer.channel = val self.update_channel_widget() @property def event(self): return self._event @event.setter def event(self, val): # Calibrate self.r1.calibrate(val) self.dl0.reduce(val) self.dl1.calibrate(val) self._event = val self.viewer.event = val self._event_index = val.count self._event_id = val.r0.event_id self.update_event_index_widget() self.update_event_id_widget() self._telid = self.viewer.telid self.update_telid_widget() self._channel = self.viewer.channel self.update_channel_widget() def update_dl1_calibrator(self, extractor=None, cleaner=None): """ Recreate the dl1 calibrator with the specified extractor and cleaner Parameters ---------- extractor : ctapipe.image.charge_extractors.ChargeExtractor cleaner : ctapipe.image.waveform_cleaning.WaveformCleaner """ if extractor is None: extractor = self.dl1.extractor if cleaner is None: cleaner = self.dl1.cleaner self.extractor = extractor self.cleaner = cleaner self.dl1 = CameraDL1Calibrator(extractor=self.extractor, cleaner=self.cleaner, parent=self) self.dl1.calibrate(self.event) self.viewer.refresh() def create_next_event_widget(self): self.w_next_event = Button(label=">", button_type="default", width=50) self.w_next_event.on_click(self.on_next_event_widget_click) def on_next_event_widget_click(self): self.event_index += 1 def create_previous_event_widget(self): self.w_previous_event = Button(label="<", button_type="default", width=50) self.w_previous_event.on_click(self.on_previous_event_widget_click) def on_previous_event_widget_click(self): self.event_index -= 1 def create_event_index_widget(self): self.w_event_index = TextInput(title="Event Index:", value='') def update_event_index_widget(self): if self.w_event_index: self.w_event_index.value = str(self.event_index) def create_event_id_widget(self): self.w_event_id = TextInput(title="Event ID:", value='') def update_event_id_widget(self): if self.w_event_id: self.w_event_id.value = str(self.event_id) def create_goto_event_index_widget(self): self.w_goto_event_index = Button(label="GOTO Index", button_type="default", width=100) self.w_goto_event_index.on_click(self.on_goto_event_index_widget_click) def on_goto_event_index_widget_click(self): self.event_index = int(self.w_event_index.value) def create_goto_event_id_widget(self): self.w_goto_event_id = Button(label="GOTO ID", button_type="default", width=70) self.w_goto_event_id.on_click(self.on_goto_event_id_widget_click) def on_goto_event_id_widget_click(self): self.event_id = int(self.w_event_id.value) def create_telid_widget(self): self.w_telid = Select(title="Telescope:", value="", options=[]) self.w_telid.on_change('value', self.on_telid_widget_change) def update_telid_widget(self): if self.w_telid: tels = [str(t) for t in self.event.r0.tels_with_data] self.w_telid.options = tels self.w_telid.value = str(self.telid) def on_telid_widget_change(self, _, __, ___): if self.telid != int(self.w_telid.value): self.telid = int(self.w_telid.value) def create_channel_widget(self): self.w_channel = Select(title="Channel:", value="", options=[]) self.w_channel.on_change('value', self.on_channel_widget_change) def update_channel_widget(self): if self.w_channel: try: n_chan = self.event.r0.tel[self.telid].waveform.shape[0] except AttributeError: n_chan = 1 channels = [str(c) for c in range(n_chan)] self.w_channel.options = channels self.w_channel.value = str(self.channel) def on_channel_widget_change(self, _, __, ___): if self.channel != int(self.w_channel.value): self.channel = int(self.w_channel.value) def create_dl1_widgets(self): self.w_dl1_dict = dict( cleaner=Select(title="Cleaner:", value='', width=5, options=BokehFileViewer.cleaner_product.values), extractor=Select(title="Extractor:", value='', width=5, options=BokehFileViewer.extractor_product.values), extractor_t0=TextInput(title="T0:", value=''), extractor_window_width=TextInput(title="Window Width:", value=''), extractor_window_shift=TextInput(title="Window Shift:", value=''), extractor_sig_amp_cut_HG=TextInput(title="Significant Amplitude " "Cut (HG):", value=''), extractor_sig_amp_cut_LG=TextInput(title="Significant Amplitude " "Cut (LG):", value=''), extractor_lwt=TextInput(title="Local Pixel Weight:", value='')) for val in self.w_dl1_dict.values(): val.on_change('value', self.on_dl1_widget_change) self.wb_extractor = widgetbox( PreText(text="Charge Extractor Configuration"), self.w_dl1_dict['cleaner'], self.w_dl1_dict['extractor'], self.w_dl1_dict['extractor_t0'], self.w_dl1_dict['extractor_window_width'], self.w_dl1_dict['extractor_window_shift'], self.w_dl1_dict['extractor_sig_amp_cut_HG'], self.w_dl1_dict['extractor_sig_amp_cut_LG'], self.w_dl1_dict['extractor_lwt']) def update_dl1_widget_values(self): if self.w_dl1_dict: for key, val in self.w_dl1_dict.items(): if 'extractor' in key: if key == 'extractor': val.value = self.extractor.__class__.__name__ else: key = key.replace("extractor_", "") try: val.value = str(getattr(self.extractor, key)) except AttributeError: val.value = '' elif 'cleaner' in key: if key == 'cleaner': val.value = self.cleaner.__class__.__name__ else: key = key.replace("cleaner_", "") try: val.value = str(getattr(self.cleaner, key)) except AttributeError: val.value = '' def on_dl1_widget_change(self, _, __, ___): if self.event: if not self._updating_dl1: self._updating_dl1 = True cmdline = [] for key, val in self.w_dl1_dict.items(): if val.value: cmdline.append(f'--{key}') cmdline.append(val.value) self.parse_command_line(cmdline) extractor = ChargeExtractor.from_name(self.extractor_product, parent=self) cleaner = WaveformCleaner.from_name(self.cleaner_product, parent=self) self.update_dl1_calibrator(extractor, cleaner) self.update_dl1_widget_values() self._updating_dl1 = False
class DockerSpawner(Spawner): """A Spawner for JupyterHub that runs each user's server in a separate docker container""" _executor = None @property def executor(self): """single global executor""" cls = self.__class__ if cls._executor is None: cls._executor = ThreadPoolExecutor(1) return cls._executor _client = None @property def client(self): """single global client instance""" cls = self.__class__ if cls._client is None: kwargs = {"version": "auto"} if self.tls_config: kwargs["tls"] = docker.tls.TLSConfig(**self.tls_config) kwargs.update(kwargs_from_env()) kwargs.update(self.client_kwargs) client = docker.APIClient(**kwargs) cls._client = client return cls._client # notice when user has set the command # default command is that of the container, # but user can override it via config _user_set_cmd = False @observe("cmd") def _cmd_changed(self, change): self._user_set_cmd = True object_id = Unicode() # the type of object we create object_type = "container" # the field containing the object id object_id_key = "Id" @property def container_id(self): """alias for object_id""" return self.object_id @property def container_name(self): """alias for object_name""" return self.object_name # deprecate misleading container_ip, since # it is not the ip in the container, # but the host ip of the port forwarded to the container # when use_internal_ip is False container_ip = Unicode("127.0.0.1", config=True) @observe("container_ip") def _container_ip_deprecated(self, change): self.log.warning( "DockerSpawner.container_ip is deprecated in dockerspawner-0.9." " Use DockerSpawner.host_ip to specify the host ip that is forwarded to the container" ) self.host_ip = change.new host_ip = Unicode( "127.0.0.1", help= """The ip address on the host on which to expose the container's port Typically 127.0.0.1, but can be public interfaces as well in cases where the Hub and/or proxy are on different machines from the user containers. Only used when use_internal_ip = False. """, config=True, ) @default('host_ip') def _default_host_ip(self): docker_host = os.getenv('DOCKER_HOST') if docker_host: urlinfo = urlparse(docker_host) if urlinfo.scheme == 'tcp': return urlinfo.hostname return '127.0.0.1' # unlike container_ip, container_port is the internal port # on which the server is bound. container_port = Int(8888, min=1, max=65535, config=True) @observe("container_port") def _container_port_changed(self, change): self.log.warning( "DockerSpawner.container_port is deprecated in dockerspawner 0.9." " Use DockerSpawner.port") self.port = change.new # fix default port to 8888, used in the container @default("port") def _port_default(self): return 8888 # default to listening on all-interfaces in the container @default("ip") def _ip_default(self): return "0.0.0.0" container_image = Unicode("jupyterhub/singleuser:%s" % _jupyterhub_xy, config=True) @observe("container_image") def _container_image_changed(self, change): self.log.warning( "DockerSpawner.container_image is deprecated in dockerspawner 0.9." " Use DockerSpawner.image") self.image = change.new image = Unicode( "jupyterhub/singleuser:%s" % _jupyterhub_xy, config=True, help="""The image to use for single-user servers. This image should have the same version of jupyterhub as the Hub itself installed. If the default command of the image does not launch jupyterhub-singleuser, set `c.Spawner.cmd` to launch jupyterhub-singleuser, e.g. Any of the jupyter docker-stacks should work without additional config, as long as the version of jupyterhub in the image is compatible. """, ) image_whitelist = Union( [Any(), Dict(), List()], default_value={}, config=True, help=""" List or dict of images that users can run. If specified, users will be presented with a form from which they can select an image to run. If a dictionary, the keys will be the options presented to users and the values the actual images that will be launched. If a list, will be cast to a dictionary where keys and values are the same (i.e. a shortcut for presenting the actual images directly to users). If a callable, will be called with the Spawner instance as its only argument. The user is accessible as spawner.user. The callable should return a dict or list as above. """, ) @validate('image_whitelist') def _image_whitelist_dict(self, proposal): """cast image_whitelist to a dict If passing a list, cast it to a {item:item} dict where the keys and values are the same. """ whitelist = proposal.value if isinstance(whitelist, list): whitelist = {item: item for item in whitelist} return whitelist def _get_image_whitelist(self): """Evaluate image_whitelist callable Or return the whitelist as-is if it's already a dict """ if callable(self.image_whitelist): whitelist = self.image_whitelist(self) if not isinstance(whitelist, dict): # always return a dict whitelist = {item: item for item in whitelist} return whitelist return self.image_whitelist @default('options_form') def _default_options_form(self): image_whitelist = self._get_image_whitelist() if len(image_whitelist) <= 1: # default form only when there are images to choose from return '' # form derived from wrapspawner.ProfileSpawner option_t = '<option value="{image}" {selected}>{image}</option>' options = [ option_t.format(image=image, selected='selected' if image == self.image else '') for image in image_whitelist ] return """ <label for="image">Select an image:</label> <select class="form-control" name="image" required autofocus> {options} </select> """.format(options=options) def options_from_form(self, formdata): """Turn options formdata into user_options""" options = {} if 'image' in formdata: options['image'] = formdata['image'][0] return options pull_policy = CaselessStrEnum( ["always", "ifnotpresent", "never"], default_value="ifnotpresent", config=True, help="""The policy for pulling the user docker image. Choices: - ifnotpresent: pull if the image is not already present (default) - always: always pull the image to check for updates, even if it is present - never: never perform a pull """) container_prefix = Unicode(config=True, help="DEPRECATED in 0.10. Use prefix") container_name_template = Unicode( config=True, help="DEPRECATED in 0.10. Use name_template") @observe("container_name_template", "container_prefix") def _deprecate_container_alias(self, change): new_name = change.name[len("container_"):] setattr(self, new_name, change.new) prefix = Unicode( "jupyter", config=True, help=dedent(""" Prefix for container names. See name_template for full container name for a particular user's server. """), ) name_template = Unicode( "{prefix}-{username}", config=True, help=dedent(""" Name of the container or service: with {username}, {imagename}, {prefix} replacements. {raw_username} can be used for the original, not escaped username (may contain uppercase, special characters). The default name_template is <prefix>-<username> for backward compatibility. """), ) client_kwargs = Dict( config=True, help= "Extra keyword arguments to pass to the docker.Client constructor.", ) volumes = Dict( config=True, help=dedent(""" Map from host file/directory to container (guest) file/directory mount point and (optionally) a mode. When specifying the guest mount point (bind) for the volume, you may use a dict or str. If a str, then the volume will default to a read-write (mode="rw"). With a dict, the bind is identified by "bind" and the "mode" may be one of "rw" (default), "ro" (read-only), "z" (public/shared SELinux volume label), and "Z" (private/unshared SELinux volume label). If format_volume_name is not set, default_format_volume_name is used for naming volumes. In this case, if you use {username} in either the host or guest file/directory path, it will be replaced with the current user's name. """), ) move_certs_image = Unicode( "busybox:1.30.1", config=True, help="""The image used to stage internal SSL certificates. Busybox is used because we just need an empty container that waits while we stage files into the volume via .put_archive. """) @gen.coroutine def move_certs(self, paths): self.log.info("Staging internal ssl certs for %s", self._log_name) yield self.pull_image(self.move_certs_image) # create the volume volume_name = self.format_volume_name(self.certs_volume_name, self) # create volume passes even if it already exists self.log.info("Creating ssl volume %s for %s", volume_name, self._log_name) yield self.docker('create_volume', volume_name) # create a tar archive of the internal cert files # docker.put_archive takes a tarfile and a running container # and unpacks the archive into the container nb_paths = {} tar_buf = BytesIO() archive = TarFile(fileobj=tar_buf, mode='w') for key, hub_path in paths.items(): fname = os.path.basename(hub_path) nb_paths[key] = '/certs/' + fname with open(hub_path, 'rb') as f: content = f.read() tarinfo = TarInfo(name=fname) tarinfo.size = len(content) tarinfo.mtime = os.stat(hub_path).st_mtime tarinfo.mode = 0o644 archive.addfile(tarinfo, BytesIO(content)) archive.close() tar_buf.seek(0) # run a container to stage the certs, # mounting the volume at /certs/ host_config = self.client.create_host_config(binds={ volume_name: { "bind": "/certs", "mode": "rw" }, }, ) container = yield self.docker( 'create_container', self.move_certs_image, volumes=["/certs"], host_config=host_config, ) container_id = container['Id'] self.log.debug( "Container %s is creating ssl certs for %s", container_id[:12], self._log_name, ) # start the container yield self.docker('start', container_id) # stage the archive to the container try: yield self.docker( 'put_archive', container=container_id, path='/certs', data=tar_buf, ) finally: yield self.docker('remove_container', container_id) return nb_paths certs_volume_name = Unicode("{prefix}ssl-{username}", config=True, help="""Volume name The same string-templating applies to this as other volume names. """) read_only_volumes = Dict( config=True, help=dedent(""" Map from host file/directory to container file/directory. Volumes specified here will be read-only in the container. If format_volume_name is not set, default_format_volume_name is used for naming volumes. In this case, if you use {username} in either the host or guest file/directory path, it will be replaced with the current user's name. """), ) format_volume_name = Any( help= """Any callable that accepts a string template and a DockerSpawner instance as parameters in that order and returns a string. Reusable implementations should go in dockerspawner.VolumeNamingStrategy, tests should go in ... """).tag(config=True) @default("format_volume_name") def _get_default_format_volume_name(self): return default_format_volume_name use_docker_client_env = Bool( True, config=True, help="DEPRECATED. Docker env variables are always used if present.", ) @observe("use_docker_client_env") def _client_env_changed(self): self.log.warning( "DockerSpawner.use_docker_client_env is deprecated and ignored." " Docker environment variables are always used if defined.") tls_config = Dict( config=True, help="""Arguments to pass to docker TLS configuration. See docker.client.TLSConfig constructor for options. """, ) tls = tls_verify = tls_ca = tls_cert = tls_key = tls_assert_hostname = Any( config=True, help= """DEPRECATED. Use DockerSpawner.tls_config dict to set any TLS options.""", ) @observe("tls", "tls_verify", "tls_ca", "tls_cert", "tls_key", "tls_assert_hostname") def _tls_changed(self, change): self.log.warning( "%s config ignored, use %s.tls_config dict to set full TLS configuration.", change.name, self.__class__.__name__, ) remove_containers = Bool( False, config=True, help="DEPRECATED in DockerSpawner 0.10. Use .remove") @observe("remove_containers") def _deprecate_remove_containers(self, change): # preserve remove_containers alias to .remove self.remove = change.new remove = Bool( False, config=True, help=""" If True, delete containers when servers are stopped. This will destroy any data in the container not stored in mounted volumes. """, ) @property def will_resume(self): # indicate that we will resume, # so JupyterHub >= 0.7.1 won't cleanup our API token return not self.remove extra_create_kwargs = Dict( config=True, help="Additional args to pass for container create") extra_host_config = Dict( config=True, help="Additional args to create_host_config for container create") _docker_safe_chars = set(string.ascii_letters + string.digits + "-") _docker_escape_char = "_" hub_ip_connect = Unicode( config=True, help=dedent(""" If set, DockerSpawner will configure the containers to use the specified IP to connect the hub api. This is useful when the hub_api is bound to listen on all ports or is running inside of a container. """), ) @observe("hub_ip_connect") def _ip_connect_changed(self, change): if jupyterhub.version_info >= (0, 8): warnings.warn( "DockerSpawner.hub_ip_connect is no longer needed with JupyterHub 0.8." " Use JupyterHub.hub_connect_ip instead.", DeprecationWarning, ) use_internal_ip = Bool( False, config=True, help=dedent(""" Enable the usage of the internal docker ip. This is useful if you are running jupyterhub (as a container) and the user containers within the same docker network. E.g. by mounting the docker socket of the host into the jupyterhub container. Default is True if using a docker network, False if bridge or host networking is used. """), ) @default("use_internal_ip") def _default_use_ip(self): # setting network_name to something other than bridge or host implies use_internal_ip if self.network_name not in {"bridge", "host"}: return True else: return False use_internal_hostname = Bool( False, config=True, help=dedent(""" Use the docker hostname for connecting. instead of an IP address. This should work in general when using docker networks, and must be used when internal_ssl is enabled. It is enabled by default if internal_ssl is enabled. """), ) @default("use_internal_hostname") def _default_use_hostname(self): # FIXME: replace getattr with self.internal_ssl # when minimum jupyterhub is 1.0 return getattr(self, 'internal_ssl', False) links = Dict( config=True, help=dedent(""" Specify docker link mapping to add to the container, e.g. links = {'jupyterhub': 'jupyterhub'} If the Hub is running in a Docker container, this can simplify routing because all traffic will be using docker hostnames. """), ) network_name = Unicode( "bridge", config=True, help=dedent(""" Run the containers on this docker network. If it is an internal docker network, the Hub should be on the same network, as internal docker IP addresses will be used. For bridge networking, external ports will be bound. """), ) @property def tls_client(self): """A tuple consisting of the TLS client certificate and key if they have been provided, otherwise None. """ if self.tls_cert and self.tls_key: return (self.tls_cert, self.tls_key) return None @property def volume_mount_points(self): """ Volumes are declared in docker-py in two stages. First, you declare all the locations where you're going to mount volumes when you call create_container. Returns a sorted list of all the values in self.volumes or self.read_only_volumes. """ return sorted([value["bind"] for value in self.volume_binds.values()]) @property def volume_binds(self): """ The second half of declaring a volume with docker-py happens when you actually call start(). The required format is a dict of dicts that looks like: { host_location: {'bind': container_location, 'mode': 'rw'} } mode may be 'ro', 'rw', 'z', or 'Z'. """ binds = self._volumes_to_binds(self.volumes, {}) read_only_volumes = {} # FIXME: replace getattr with self.internal_ssl # when minimum jupyterhub is 1.0 if getattr(self, 'internal_ssl', False): # add SSL volume as read-only read_only_volumes[self.certs_volume_name] = '/certs' read_only_volumes.update(self.read_only_volumes) return self._volumes_to_binds(read_only_volumes, binds, mode="ro") _escaped_name = None @property def escaped_name(self): """Escape the username so it's safe for docker objects""" if self._escaped_name is None: self._escaped_name = self._escape(self.user.name) return self._escaped_name def _escape(self, s): """Escape a string to docker-safe characters""" return escape( s, safe=self._docker_safe_chars, escape_char=self._docker_escape_char, ) object_id = Unicode(allow_none=True) def template_namespace(self): escaped_image = self.image.replace("/", "_") server_name = getattr(self, "name", "") return { "username": self.escaped_name, "safe_username": self.user.name, "raw_username": self.user.name, "imagename": escaped_image, "servername": server_name, "prefix": self.prefix, } @property def object_name(self): """Render the name of our container/service using name_template""" return self.name_template.format(**self.template_namespace()) def load_state(self, state): super(DockerSpawner, self).load_state(state) if "container_id" in state: # backward-compatibility for dockerspawner < 0.10 self.object_id = state.get("container_id") else: self.object_id = state.get("object_id", "") def get_state(self): state = super(DockerSpawner, self).get_state() if self.object_id: state["object_id"] = self.object_id return state def _public_hub_api_url(self): proto, path = self.hub.api_url.split("://", 1) ip, rest = path.split(":", 1) return "{proto}://{ip}:{rest}".format(proto=proto, ip=self.hub_ip_connect, rest=rest) def _env_keep_default(self): """Don't inherit any env from the parent process""" return [] def get_args(self): args = super().get_args() if self.hub_ip_connect: # JupyterHub 0.7 specifies --hub-api-url # on the command-line, which is hard to update for idx, arg in enumerate(list(args)): if arg.startswith("--hub-api-url="): args.pop(idx) break args.append("--hub-api-url=%s" % self._public_hub_api_url()) return args def _docker(self, method, *args, **kwargs): """wrapper for calling docker methods to be passed to ThreadPoolExecutor """ m = getattr(self.client, method) return m(*args, **kwargs) def docker(self, method, *args, **kwargs): """Call a docker method in a background thread returns a Future """ return self.executor.submit(self._docker, method, *args, **kwargs) @gen.coroutine def poll(self): """Check for my id in `docker ps`""" container = yield self.get_object() if not container: self.log.warning("Container not found: %s", self.container_name) return 0 container_state = container["State"] self.log.debug("Container %s status: %s", self.container_id[:7], pformat(container_state)) if container_state["Running"]: return None else: return ("ExitCode={ExitCode}, " "Error='{Error}', " "FinishedAt={FinishedAt}".format(**container_state)) @gen.coroutine def get_object(self): self.log.debug("Getting container '%s'", self.object_name) try: obj = yield self.docker("inspect_%s" % self.object_type, self.object_name) self.object_id = obj[self.object_id_key] except APIError as e: if e.response.status_code == 404: self.log.info("%s '%s' is gone", self.object_type.title(), self.object_name) obj = None # my container is gone, forget my id self.object_id = "" elif e.response.status_code == 500: self.log.info( "%s '%s' is on unhealthy node", self.object_type.title(), self.object_name, ) obj = None # my container is unhealthy, forget my id self.object_id = "" else: raise return obj @gen.coroutine def get_command(self): """Get the command to run (full command + args)""" if self._user_set_cmd: cmd = self.cmd else: image_info = yield self.docker("inspect_image", self.image) cmd = image_info["Config"]["Cmd"] return cmd + self.get_args() @gen.coroutine def remove_object(self): self.log.info("Removing %s %s", self.object_type, self.object_id) # remove the container, as well as any associated volumes try: yield self.docker("remove_" + self.object_type, self.object_id, v=True) except docker.errors.APIError as e: if e.status_code == 409: self.log.debug("Already removing %s: %s", self.object_type, self.object_id) else: raise @gen.coroutine def check_image_whitelist(self, image): image_whitelist = self._get_image_whitelist() if not image_whitelist: return image if image not in image_whitelist: raise web.HTTPError( 400, "Image %s not in whitelist: %s" % (image, ', '.join(image_whitelist)), ) # resolve image alias to actual image name return image_whitelist[image] @default('ssl_alt_names') def _get_ssl_alt_names(self): return ['DNS:' + self.internal_hostname] @gen.coroutine def create_object(self): """Create the container/service object""" create_kwargs = dict( image=self.image, environment=self.get_env(), volumes=self.volume_mount_points, name=self.container_name, command=(yield self.get_command()), ) # ensure internal port is exposed create_kwargs["ports"] = {"%i/tcp" % self.port: None} create_kwargs.update(self.extra_create_kwargs) # build the dictionary of keyword arguments for host_config host_config = dict(binds=self.volume_binds, links=self.links) if getattr(self, "mem_limit", None) is not None: # If jupyterhub version > 0.7, mem_limit is a traitlet that can # be directly configured. If so, use it to set mem_limit. # this will still be overriden by extra_host_config host_config["mem_limit"] = self.mem_limit if not self.use_internal_ip: host_config["port_bindings"] = {self.port: (self.host_ip, )} host_config.update(self.extra_host_config) host_config.setdefault("network_mode", self.network_name) self.log.debug("Starting host with config: %s", host_config) host_config = self.client.create_host_config(**host_config) create_kwargs.setdefault("host_config", {}).update(host_config) # create the container obj = yield self.docker("create_container", **create_kwargs) return obj @gen.coroutine def start_object(self): """Actually start the container/service e.g. calling `docker start` """ return self.docker("start", self.container_id) @gen.coroutine def stop_object(self): """Stop the container/service e.g. calling `docker stop`. Does not remove the container. """ return self.docker("stop", self.container_id) @gen.coroutine def pull_image(self, image): """Pull the image, if needed - pulls it unconditionally if pull_policy == 'always' - otherwise, checks if it exists, and - raises if pull_policy == 'never' - pulls if pull_policy == 'ifnotpresent' """ # docker wants to split repo:tag if ':' in image: repo, tag = image.split(':', 1) else: repo = image tag = 'latest' if self.pull_policy.lower() == 'always': # always pull self.log.info("pulling %s", image) yield self.docker('pull', repo, tag) # done return try: # check if the image is present yield self.docker('inspect_image', image) except docker.errors.NotFound: if self.pull_policy == "never": # never pull, raise because there is no such image raise elif self.pull_policy == "ifnotpresent": # not present, pull it for the first time self.log.info("pulling image %s", image) yield self.docker('pull', repo, tag) @gen.coroutine def start(self, image=None, extra_create_kwargs=None, extra_host_config=None): """Start the single-user server in a docker container. Additional arguments to create/host config/etc. can be specified via .extra_create_kwargs and .extra_host_config attributes. If the container exists and `c.DockerSpawner.remove` is true, then the container is removed first. Otherwise, the existing containers will be restarted. """ if image: self.log.warning("Specifying image via .start args is deprecated") self.image = image if extra_create_kwargs: self.log.warning( "Specifying extra_create_kwargs via .start args is deprecated") self.extra_create_kwargs.update(extra_create_kwargs) if extra_host_config: self.log.warning( "Specifying extra_host_config via .start args is deprecated") self.extra_host_config.update(extra_host_config) # image priority: # 1. user options (from spawn options form) # 2. self.image from config image_option = self.user_options.get('image') if image_option: # save choice in self.image self.image = yield self.check_image_whitelist(image_option) image = self.image yield self.pull_image(image) obj = yield self.get_object() if obj and self.remove: self.log.warning( "Removing %s that should have been cleaned up: %s (id: %s)", self.object_type, self.object_name, self.object_id[:7], ) yield self.remove_object() obj = None if obj is None: obj = yield self.create_object() self.object_id = obj[self.object_id_key] self.log.info( "Created %s %s (id: %s) from image %s", self.object_type, self.object_name, self.object_id[:7], self.image, ) else: self.log.info( "Found existing %s %s (id: %s)", self.object_type, self.object_name, self.object_id[:7], ) # Handle re-using API token. # Get the API token from the environment variables # of the running container: for line in obj["Config"]["Env"]: if line.startswith( ("JPY_API_TOKEN=", "JUPYTERHUB_API_TOKEN=")): self.api_token = line.split("=", 1)[1] break # TODO: handle unpause self.log.info( "Starting %s %s (id: %s)", self.object_type, self.object_name, self.container_id[:7], ) # start the container yield self.start_object() ip, port = yield self.get_ip_and_port() if jupyterhub.version_info < (0, 7): # store on user for pre-jupyterhub-0.7: self.user.server.ip = ip self.user.server.port = port # jupyterhub 0.7 prefers returning ip, port: return (ip, port) @property def internal_hostname(self): """Return our hostname used with internal SSL """ return self.container_name @gen.coroutine def get_ip_and_port(self): """Queries Docker daemon for container's IP and port. If you are using network_mode=host, you will need to override this method as follows:: @gen.coroutine def get_ip_and_port(self): return self.host_ip, self.port You will need to make sure host_ip and port are correct, which depends on the route to the container and the port it opens. """ if self.use_internal_hostname: # internal ssl uses hostnames, # required for domain-name matching with internal SSL # TODO: should we always do this? # are there any cases where internal_ip works # and internal_hostname doesn't? ip = self.internal_hostname port = self.port elif self.use_internal_ip: resp = yield self.docker("inspect_container", self.container_id) network_settings = resp["NetworkSettings"] if "Networks" in network_settings: ip = self.get_network_ip(network_settings) else: # Fallback for old versions of docker (<1.9) without network management ip = network_settings["IPAddress"] port = self.port else: resp = yield self.docker("port", self.container_id, self.port) if resp is None: raise RuntimeError("Failed to get port info for %s" % self.container_id) ip = resp[0]["HostIp"] port = int(resp[0]["HostPort"]) if ip == "0.0.0.0": ip = urlparse(self.client.base_url).hostname if ip == "localnpipe": ip = "localhost" return ip, port def get_network_ip(self, network_settings): networks = network_settings["Networks"] if self.network_name not in networks: raise Exception( "Unknown docker network '{network}'." " Did you create it with `docker network create <name>`?". format(network=self.network_name)) network = networks[self.network_name] ip = network["IPAddress"] return ip @gen.coroutine def stop(self, now=False): """Stop the container Will remove the container if `c.DockerSpawner.remove` is `True`. Consider using pause/unpause when docker-py adds support. """ self.log.info( "Stopping %s %s (id: %s)", self.object_type, self.object_name, self.object_id[:7], ) yield self.stop_object() if self.remove: yield self.remove_object() self.clear_state() def _volumes_to_binds(self, volumes, binds, mode="rw"): """Extract the volume mount points from volumes property. Returns a dict of dict entries of the form:: {'/host/dir': {'bind': '/guest/dir': 'mode': 'rw'}} """ def _fmt(v): return self.format_volume_name(v, self) for k, v in volumes.items(): m = mode if isinstance(v, dict): if "mode" in v: m = v["mode"] v = v["bind"] binds[_fmt(k)] = {"bind": _fmt(v), "mode": m} return binds
class Axis(BaseAxis): """A line axis. A line axis is the visual representation of a numerical or date scale. Attributes ---------- icon: string (class-level attribute) The font-awesome icon name for this object. axis_types: dict (class-level attribute) A registry of existing axis types. orientation: {'horizontal', 'vertical'} The orientation of the axis, either vertical or horizontal side: {'bottom', 'top', 'left', 'right'} or None (default: None) The side of the axis, either bottom, top, left or right. label: string (default: '') The axis label tick_format: string or None (default: '') The tick format for the axis. scale: Scale The scale represented by the axis num_ticks: int or None (default: None) If tick_values is None, number of ticks tick_values: numpy.ndarray or None (default: []) Tick values for the axis offset: dict (default: {}) Contains a scale and a value {'scale': scale or None, 'value': value of the offset} If offset['scale'] is None, the corresponding figure scale is used instead. label_location: {'middle', 'start', 'end'} The location of the label along the axis, one of 'start', 'end' or 'middle' label_color: Color or None (default: None) The color of the axis label grid_lines: {'none', 'solid', 'dashed'} The display of the grid lines grid_color: Color or None (default: None) The color of the grid lines color: Color or None (default: None) The color of the line label_offset: string or None (default: None) Label displacement from the axis line. Units allowed are 'em', 'px' and 'ex'. Positive values are away from the figure and negative values are towards the figure with resepect to the axis line. visible: bool (default: True) A visibility toggle for the axis """ icon = 'fa-arrows' orientation = Enum(['horizontal', 'vertical'], default_value='horizontal', sync=True) side = Enum(['bottom', 'top', 'left', 'right'], allow_none=True, default_value=None, sync=True) label = Unicode(sync=True) grid_lines = Enum(['none', 'solid', 'dashed'], default_value='none', sync=True) tick_format = Unicode(None, allow_none=True, sync=True) scale = Instance(Scale, sync=True, **widget_serialization) num_ticks = Int(default_value=None, sync=True, allow_none=True) tick_values = NdArray(sync=True, allow_none=True) offset = Dict(sync=True, **widget_serialization) label_location = Enum(['middle', 'start', 'end'], default_value='middle', sync=True) label_color = Color(None, sync=True, allow_none=True) grid_color = Color(None, sync=True, allow_none=True) color = Color(None, sync=True, allow_none=True) label_offset = Unicode(default_value=None, sync=True, allow_none=True) visible = Bool(True, sync=True) _view_name = Unicode('Axis', sync=True) _view_module = Unicode('nbextensions/bqplot/Axis', sync=True) _model_name = Unicode('AxisModel', sync=True) _model_module = Unicode('nbextensions/bqplot/AxisModel', sync=True) _ipython_display_ = None # We cannot display an axis outside of a figure.
class Process(HasTraits): """ Process (top-level workflow) information """ type_ = Enum(["Observation", "Simulation", "Other"], "Other") subtype = Unicode("") id_ = Int()
class EventSource(Component): """ Parent class for EventFileReaders of different sources. A new EventFileReader should be created for each type of event file read into ctapipe, e.g. sim_telarray files are read by the `HESSIOEventSource`. EventFileReader provides a common high-level interface for accessing event information from different data sources (simulation or different camera file formats). Creating an EventFileReader for a new file format ensures that data can be accessed in a common way, irregardless of the file format. EventFileReader itself is an abstract class. To use an EventFileReader you must use a subclass that is relevant for the file format you are reading (for example you must use `ctapipe.io.hessiofilereader.HESSIOEventSource` to read a hessio format file). Alternatively you can use `ctapipe.io.eventfilereader.EventSourceFactory` to automatically select the correct EventFileReader subclass for the file format you wish to read. To create an instance of an EventFileReader you must pass the traitlet configuration (containing the input_url) and the `ctapipe.core.tool.Tool`. Therefore from inside a Tool you would do: >>> event_source = EventSource(self.config, self) An example of how to use `ctapipe.core.tool.Tool` and `ctapipe.io.eventfilereader.EventSourceFactory` can be found in ctapipe/examples/calibration_pipeline.py. However if you are not inside a Tool, you can still create an instance and supply an input_url via: >>> event_source = EventSource( input_url="/path/to/file") To loop through the events in a file: >>> event_source = EventSource( input_url="/path/to/file") >>> for event in event_source: >>> print(event.count) **NOTE**: Every time a new loop is started through the event_source, it restarts from the first event. Alternatively one can use EventFileReader in a `with` statement to ensure the correct cleanups are performed when you are finished with the event_source: >>> with EventSource( input_url="/path/to/file") as event_source: >>> for event in event_source: >>> print(event.count) **NOTE**: The "event" that is returned from the generator is a pointer. Any operation that progresses that instance of the generator further will change the data pointed to by "event". If you wish to ensure a particular event is kept, you should perform a `event_copy = copy.deepcopy(event)`. Attributes ---------- input_url : str Path to the input event file. max_events : int Maximum number of events to loop through in generator metadata : dict A dictionary containing the metadata of the file. This could include: * is_simulation (bool indicating if the file contains simulated events) * Telescope:Camera names (list if file contains multiple) * Information in the file header * Observation ID """ input_url = Unicode( '', help='Path to the input file containing events.').tag(config=True) max_events = Int( None, allow_none=True, help='Maximum number of events that will be read from the file').tag( config=True) allowed_tels = Set( help=('list of allowed tel_ids, others will be ignored. ' 'If left empty, all telescopes in the input stream ' 'will be included')).tag(config=True) def __init__(self, config=None, tool=None, **kwargs): """ Class to handle generic input files. Enables obtaining the "source" generator, regardless of the type of file (either hessio or camera file). Parameters ---------- config : traitlets.loader.Config Configuration specified by config file or cmdline arguments. Used to set traitlet values. Set to None if no configuration to pass. tool : ctapipe.core.Tool Tool executable that is calling this component. Passes the correct logger to the component. Set to None if no Tool to pass. kwargs """ super().__init__(config=config, parent=tool, **kwargs) self.metadata = dict(is_simulation=False) if not exists(self.input_url): raise FileNotFoundError("file path does not exist: '{}'".format( self.input_url)) self.log.info("INPUT PATH = {}".format(self.input_url)) if self.max_events: self.log.info("Max events being read = {}".format(self.max_events)) Provenance().add_input_file(self.input_url, role='dl0.sub.evt') @staticmethod @abstractmethod def is_compatible(file_path): """ Abstract method to be defined in child class. Perform a set of checks to see if the input file is compatible with this file event_source. Parameters ---------- file_path : str File path to the event file. Returns ------- compatible : bool True if file is compatible, False if it is incompatible """ @property def is_stream(self): """ Bool indicating if input is a stream. If it is then it is incompatible with `ctapipe.io.eventseeker.EventSeeker`. TODO: Define a method to detect if it is a stream Returns ------- bool If True, then input is a stream. """ return False @abstractmethod def _generator(self): """ Abstract method to be defined in child class. Generator where the filling of the `ctapipe.io.containers` occurs. Returns ------- generator """ def __iter__(self): """ Generator that iterates through `_generator`, but keeps track of `self.max_events`. Returns ------- generator """ for event in self._generator(): if self.max_events and event.count >= self.max_events: break yield event def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): pass
class Select(_Selection): """Listbox that only allows one item to be selected at any given time.""" _view_name = Unicode('SelectView').tag(sync=True) _model_name = Unicode('SelectModel').tag(sync=True) rows = Int(5, help="The number of rows to display.").tag(sync=True)
class Circle(Path): _view_name = Unicode('LeafletCircleView').tag(sync=True) _model_name = Unicode('LeafletCircleModel').tag(sync=True) location = List(def_loc).tag(sync=True) radius = Int(1000, help="radius of circle in meters").tag(sync=True)
class Viewer(widgets.DOMWidget): """ Generic object for viewing and labeling Candidate objects in their rendered Contexts. """ _view_name = Unicode('ViewerView').tag(sync=True) _view_module = Unicode('viewer').tag(sync=True) cids = List().tag(sync=True) html = Unicode('<h3>Error!</h3>').tag(sync=True) _labels_serialized = Unicode().tag(sync=True) _selected_cid = Int().tag(sync=True) def __init__(self, candidates, session, gold=[], n_per_page=3, height=225, annotator_name=None): """ Initializes a Viewer. The Viewer uses the keyword argument annotator_name to define a AnnotatorLabelKey with that name. :param candidates: A Python container of Candidates (e.g., not a CandidateSet, but candidate_set.candidates) :param session: The SnorkelSession for the database backend :param gold: Optional, Python container of Candidates that are know to have positive labels :param n_per_page: Optional, number of Contexts to display per page :param height: Optional, the height in pixels of the Viewer :param annotator_name: Name of the human using the Viewer, for saving their work. Defaults to system username. """ super(Viewer, self).__init__() self.session = session # By default, use the username as annotator name name = annotator_name if annotator_name is not None else getpass.getuser() # Sets up the AnnotationKey to use self.annotator = self.session.query(GoldLabelKey).filter(GoldLabelKey.name == name).first() if self.annotator is None: self.annotator = GoldLabelKey(name=name) session.add(self.annotator) session.commit() # Viewer display configs self.n_per_page = n_per_page self.height = height # Note that the candidates are not necessarily commited to the DB, so they *may not have* non-null ids # Hence, we index by their position in this list # We get the sorted candidates and all contexts required, either from unary or binary candidates self.gold = list(gold) self.candidates = sorted(list(candidates), key=lambda c : c[0].char_start) self.contexts = list(set(c[0].get_parent() for c in self.candidates + self.gold)) # If committed, sort contexts by id try: self.contexts = sorted(self.contexts, key=lambda c : c.id) except: pass # Loads existing annotations self.annotations = [None] * len(self.candidates) self.annotations_stable = [None] * len(self.candidates) init_labels_serialized = [] for i, candidate in enumerate(self.candidates): # First look for the annotation in the primary annotations table existing_annotation = self.session.query(GoldLabel) \ .filter(GoldLabel.key == self.annotator) \ .filter(GoldLabel.candidate == candidate) \ .first() if existing_annotation is not None: self.annotations[i] = existing_annotation if existing_annotation.value == 1: value_string = 'true' elif existing_annotation.value == -1: value_string = 'false' else: raise ValueError(str(existing_annotation) + ' has value not in {1, -1}, which Viewer does not support.') init_labels_serialized.append(str(i) + '~~' + value_string) # If the annotator label is in the main table, also get its stable version context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()]) existing_annotation_stable = self.session.query(StableLabel) \ .filter(StableLabel.context_stable_ids == context_stable_ids)\ .filter(StableLabel.annotator_name == name).one_or_none() # If stable version is not available, create it here # NOTE: This is for versioning issues, should be removed? if existing_annotation_stable is None: context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()]) existing_annotation_stable = StableLabel(context_stable_ids=context_stable_ids,\ annotator_name=self.annotator.name,\ split=candidate.split,\ value=existing_annotation.value) self.session.add(existing_annotation_stable) self.session.commit() self.annotations_stable[i] = existing_annotation_stable self._labels_serialized = ','.join(init_labels_serialized) # Configures message handler self.on_msg(self.handle_label_event) # display js, construct html and pass on to widget model self.render() def _tag_span(self, html, cids, gold=False): """ Create the span around a segment of the context associated with one or more candidates / gold annotations """ classes = ['candidate'] if len(cids) > 0 else [] classes += ['gold-annotation'] if gold else [] classes += list(map(str, cids)) # Scrub for non-ascii characters; replace with ? return u'<span class="{classes}">{html}</span>'.format(classes=' '.join(classes), html=html) def _tag_context(self, context, candidates, gold): """Given the raw context, tag the spans using the generic _tag_span method""" raise NotImplementedError() def render(self): """Renders viewer pane""" cids = [] # Iterate over pages of contexts pid = 0 pages = [] N = len(self.contexts) for i in range(0, N, self.n_per_page): page_cids = [] lis = [] for j in range(i, min(N, i + self.n_per_page)): context = self.contexts[j] # Get the candidates in this context candidates = [c for c in self.candidates if c[0].get_parent() == context] gold = [g for g in self.gold if g.get_parent() == context] # Construct the <li> and page view elements li_data = self._tag_context(context, candidates, gold) lis.append(LI_HTML.format(data=li_data, context_id=context.id)) page_cids.append([self.candidates.index(c) for c in candidates]) # Assemble the page... pages.append(PAGE_HTML.format( pid=pid, data=''.join(lis), etc=' style="display: block;"' if i == 0 else '' )) cids.append(page_cids) pid += 1 # Render in primary Viewer template self.cids = cids self.html = open(os.path.join(directory, 'viewer.html')).read() % (self.height, ''.join(pages)) display(Javascript(open(os.path.join(directory, 'viewer.js')).read())) def _get_labels(self): """ De-serialize labels from Javascript widget, map to internal candidate id, and return as list of tuples """ LABEL_MAP = {'true':1, 'false':-1} labels = [x.split('~~') for x in self._labels_serialized.split(',') if len(x) > 0] vals = [(int(cid), LABEL_MAP.get(l, 0)) for cid,l in labels] return vals def handle_label_event(self, _, content, buffers): """ Handles label event by persisting new label """ if content.get('event', '') == 'set_label': cid = content.get('cid', None) value = content.get('value', None) if value is True: value = 1 elif value is False: value = -1 else: raise ValueError('Unexpected label returned from widget: ' + str(value) + '. Expected values are True and False.') # If label already exists, just update value (in both AnnotatorLabel and StableLabel) if self.annotations[cid] is not None: if self.annotations[cid].value != value: self.annotations[cid].value = value self.annotations_stable[cid].value = value self.session.commit() # Otherwise, create a AnnotatorLabel *and a StableLabel* else: candidate = self.candidates[cid] # Create AnnotatorLabel self.annotations[cid] = GoldLabel(key=self.annotator, candidate=candidate, value=value) self.session.add(self.annotations[cid]) # Create StableLabel context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()]) self.annotations_stable[cid] = StableLabel(context_stable_ids=context_stable_ids,\ annotator_name=self.annotator.name,\ value=value,\ split=candidate.split) self.session.add(self.annotations_stable[cid]) self.session.commit() elif content.get('event', '') == 'delete_label': cid = content.get('cid', None) self.session.delete(self.annotations[cid]) self.annotations[cid] = None self.session.delete(self.annotations_stable[cid]) self.annotations_stable[cid] = None self.session.commit() def get_selected(self): return self.candidates[self._selected_cid]