class topographic_grid(xy_grid): """ By default, plot the XPreference and YPreference preferences for all Sheets for which they are defined, using MatPlotLib. If sheet_views other than XPreference and YPreference are desired, the names of these can be passed in as arguments. """ xsheet_view_name = param.String(default='XPreference',doc=""" Name of the SheetView holding the X position locations.""") ysheet_view_name = param.String(default='YPreference',doc=""" Name of the SheetView holding the Y position locations.""") # Disable and hide parameters inherited from the base class x = param.Array(constant=True, precedence=-1) y = param.Array(constant=True, precedence=-1) def __call__(self, **params): p = ParamOverrides(self, params) for sheet in topo.sim.objects(Sheet).values(): if ((p.xsheet_view_name in sheet.views.Maps) and (p.ysheet_view_name in sheet.views.Maps)): x = sheet.views.Maps[p.xsheet_view_name].last.data y = sheet.views.Maps[p.ysheet_view_name].last.data filename_suffix = "_" + sheet.name title = 'Topographic mapping to ' + sheet.name + ' at time ' \ + topo.sim.timestr() super(topographic_grid, self).__call__(x=x, y=y, title=title, filename_suffix=filename_suffix)
class xy_grid(PylabPlotCommand): """ By default, plot the x and y coordinate preferences as a grid. """ axis = param.Parameter(default=[-0.5, 0.5, -0.5, 0.5], doc=""" Four-element list of the plot bounds, i.e. [xmin, xmax, ymin, ymax].""" ) skip = param.Integer(default=1, bounds=[1, None], softbounds=[1, 10], doc=""" Plot every skipth line in each direction. E.g. skip=4 means to keep only every fourth horizontal line and every fourth vertical line, except that the first and last are always included. The default is to include all data points.""") x = param.Array(doc="Numpy array of x positions in the grid.") y = param.Array(doc="Numpy array of y positions in the grid.") def __call__(self, **params): p = ParamOverrides(self, params) fig = plt.figure(figsize=(5, 5)) # This one-liner works in Octave, but in matplotlib it # results in lines that are all connected across rows and columns, # so here we plot each line separately: # plt.plot(x,y,"k-",transpose(x),transpose(y),"k-") # Here, the "k-" means plot in black using solid lines; # see matplotlib for more info. isint = plt.isinteractive() # Temporarily make non-interactive for # plotting plt.ioff() for r, c in zip(p.y[::p.skip], p.x[::p.skip]): plt.plot(c, r, "k-") for r, c in zip( np.transpose(p.y)[::p.skip], np.transpose(p.x)[::p.skip]): plt.plot(c, r, "k-") # Force last line avoid leaving cells open if p.skip != 1: plt.plot(p.x[-1], p.y[-1], "k-") plt.plot(np.transpose(p.x)[-1], np.transpose(p.y)[-1], "k-") plt.xlabel('x') plt.ylabel('y') # Currently sets the input range arbitrarily; should presumably figure out # what the actual possible range is for this simulation (which would presumably # be the maximum size of any GeneratorSheet?). plt.axis(p.axis) if isint: plt.ion() self._generate_figure(p) return fig
def params_from_kwargs(**kwargs): """ Utility to promote keywords with literal values to the appropriate parameter type with the specified default value unless the value is already a parameter. """ params = {} for k, v in kwargs.items(): kws = dict(default=v) if isinstance(v, param.Parameter): params[k] = v elif isinstance(v, bool): params[k] = param.Boolean(**kws) elif isinstance(v, int): params[k] = param.Integer(**kws) elif isinstance(v, float): params[k] = param.Number(**kws) elif isinstance(v, str): params[k] = param.String(**kws) elif isinstance(v, dict): params[k] = param.Dict(**kws) elif isinstance(v, tuple): params[k] = param.Tuple(**kws) elif isinstance(v, list): params[k] = param.List(**kws) elif isinstance(v, np.ndarray): params[k] = param.Array(**kws) else: params[k] = param.Parameter(**kws) return params
class AudioFile(TimeSeries): """ Requires an audio file in any format accepted by audiolab (wav, aiff, flac). """ time_series = param.Array(precedence=(-1)) sample_rate = param.Number(precedence=(-1)) filename = param.Filename(default='sounds/complex/daisy.wav', doc=""" File path (can be relative to Param's base path) to an audio file. The audio can be in any format accepted by audiolab, e.g. WAV, AIFF, or FLAC.""" ) precision = param.Parameter(default=float64, doc=""" The float precision to use for loaded audio files.""") def __init__(self, **params): super(AudioFile, self).__init__(**params) self._load_audio_file() def _load_audio_file(self): source = audiolab.Sndfile(self.filename, 'r') # audiolab scales the range by the bit depth automatically so the dynamic range is now [-1.0, 1.0] # we rescale it to the range [0.0, 1.0] self.time_series = ( source.read_frames(source.nframes, dtype=self.precision) + 1) / 2 self.sample_rate = source.samplerate
class HvDataset(param.Parameterized): '''Converts a numpy image to holoviews Dataset dynamic map''' img = param.Array(np.zeros((2, 2), dtype=np.uint8), doc='numpy iamge array', precedence=-1) label = param.String('channel', doc='label for the generated hv.Dataset', precedence=-1) spacing = param.Parameter((1, ), doc='pixel/voxel size', precedence=-1) _update_counter = param.Integer(0, precedence=-1) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._broadcast_spacing() @param.depends() def _broadcast_spacing(self): self.spacing = tuple( np.broadcast_to(np.array(self.spacing), self.img.ndim).tolist()) @param.depends('img', watch=True) def _update_img(self): self._broadcast_spacing() self._update_counter += 1 # NOTE dynamic map with dependency directly on array is less responsive (hash computation overhead?) @param.depends('_update_counter', 'label') def _build_dataset(self): return image_to_hvds(self.img, self.label, self.spacing) @param.depends('spacing') def dmap(self): return hv.DynamicMap(self._build_dataset, cache_size=1)
class TestSet(param.Parameterized): numpy_params = ['r'] pandas_params = ['s','t','u'] conditionally_unsafe = ['f', 'o'] a = param.Integer(default=5, doc='Example doc', bounds=(2,30), inclusive_bounds=(True, False)) b = param.Number(default=4.3, allow_None=True) c = param.String(default='foo') d = param.Boolean(default=False) e = param.List([1,2,3], class_=int) f = param.List([1,2,3]) g = param.Date(default=datetime.datetime.now()) h = param.Tuple(default=(1,2,3), length=3) i = param.NumericTuple(default=(1,2,3,4)) j = param.XYCoordinates(default=(32.1, 51.5)) k = param.Integer(default=1) l = param.Range(default=(1.1,2.3), bounds=(1,3)) m = param.String(default='baz', allow_None=True) n = param.ObjectSelector(default=3, objects=[3,'foo'], allow_None=False) o = param.ObjectSelector(default=simple_list, objects=[simple_list], allow_None=False) p = param.ListSelector(default=[1,4,5], objects=[1,2,3,4,5,6]) q = param.CalendarDate(default=datetime.date.today()) r = None if np is None else param.Array(default=ndarray) s = None if pd is None else param.DataFrame(default=df1, columns=2) t = None if pd is None else param.DataFrame(default=pd.DataFrame( {'A':[1,2,3], 'B':[1.1,2.2,3.3]}), columns=(1,4), rows=(2,5)) u = None if pd is None else param.DataFrame(default=df2, columns=['A', 'B']) v = param.Dict({'1':2})
class Lasso(LinkedStream): """ A stream representing a lasso selection in 2D space as a two-column array of coordinates. """ geometry = param.Array(constant=True, doc=""" The coordinates of the lasso geometry as a two-column array.""")
class _BigDumbParams(param.Parameterized): action = param.Action(default_action, allow_None=True) array = param.Array(np.array([1.0, 2.0])) boolean = param.Boolean(True, allow_None=True) callable = param.Callable(default_action, allow_None=True) class_selector = param.ClassSelector(int, is_instance=False, allow_None=True) color = param.Color("#FFFFFF", allow_None=True) composite = param.Composite(["action", "array"], allow_None=True) try: data_frame = param.DataFrame( pd.DataFrame({"A": 1.0, "B": np.arange(5)}), allow_None=True ) except TypeError: data_frame = param.DataFrame(pd.DataFrame({"A": 1.0, "B": np.arange(5)})) date = param.Date(datetime.now(), allow_None=True) date_range = param.DateRange((datetime.min, datetime.max), allow_None=True) dict_ = param.Dict({"foo": "bar"}, allow_None=True, doc="dict means dictionary") dynamic = param.Dynamic(default=default_action, allow_None=True) file_selector = param.FileSelector( os.path.join(FILE_DIR_DIR, "LICENSE"), path=os.path.join(FILE_DIR_DIR, "*"), allow_None=True, ) filename = param.Filename( os.path.join(FILE_DIR_DIR, "LICENSE"), allow_None=True ) foldername = param.Foldername(os.path.join(FILE_DIR_DIR), allow_None=True) hook_list = param.HookList( [CallableObject(), CallableObject()], class_=CallableObject, allow_None=True ) integer = param.Integer(10, allow_None=True) list_ = param.List([1, 2, 3], allow_None=True, class_=int) list_selector = param.ListSelector([2, 2], objects=[1, 2, 3], allow_None=True) magnitude = param.Magnitude(0.5, allow_None=True) multi_file_selector = param.MultiFileSelector( [], path=os.path.join(FILE_DIR_DIR, "*"), allow_None=True, check_on_set=True, ) number = param.Number(-10.0, allow_None=True, doc="here is a number") numeric_tuple = param.NumericTuple((5.0, 10.0), allow_None=True) object_selector = param.ObjectSelector( False, objects={"False": False, "True": 1}, allow_None=True ) path = param.Path(os.path.join(FILE_DIR_DIR, "LICENSE"), allow_None=True) range_ = param.Range((-1.0, 2.0), allow_None=True) series = param.Series(pd.Series(range(5)), allow_None=True) string = param.String("foo", allow_None=True, doc="this is a string") tuple_ = param.Tuple((3, 4, "fi"), allow_None=True) x_y_coordinates = param.XYCoordinates((1.0, 2.0), allow_None=True)
def define(cls, name, **kwargs): """ Utility to quickly and easily declare Stream classes. Designed for interactive use such as notebooks and shouldn't replace parameterized class definitions in source code that is imported. Takes a stream class name and a set of keywords where each keyword becomes a parameter. If the value is already a parameter, it is simply used otherwise the appropriate parameter type is inferred and declared, using the value as the default. Supported types: bool, int, float, str, dict, tuple and list """ params = {'name': param.String(default=name)} for k, v in kwargs.items(): kws = dict(default=v, constant=True) if isinstance(v, param.Parameter): params[k] = v elif isinstance(v, bool): params[k] = param.Boolean(**kws) elif isinstance(v, int): params[k] = param.Integer(**kws) elif isinstance(v, float): params[k] = param.Number(**kws) elif isinstance(v, str): params[k] = param.String(**kws) elif isinstance(v, dict): params[k] = param.Dict(**kws) elif isinstance(v, tuple): params[k] = param.Tuple(**kws) elif isinstance(v, list): params[k] = param.List(**kws) elif isinstance(v, np.ndarray): params[k] = param.Array(**kws) else: params[k] = param.Parameter(**kws) # Dynamic class creation using type return type(name, (Stream,), params)
class TestSet(param.Parameterized): array = None if np is None else param.Array(default=ndarray) data_frame = None if pd is None else param.DataFrame(default=df)
class Z(param.Parameterized): z = param.Array(numpy.array([1]))
class OrthoSegmentationDashBoard(BaseImageDashBoard): '''Dashboard to views 3D, multi-channel images as color composite.''' channel_config = param.Dict({}, doc='dictionnary configuring each channel') composite_channels = param.List( doc='ids of channels to be displayed as color composite') overlay_channels = param.List( doc='ids of channels to be displayed as overlay on top of the composite' ) segmentation_viewer = param.Parameter(SegmentationViewer()) hv_datasets = param.List() ortho_viewer = param.Parameter(OrthoViewer(add_crosshairs=False)) spacing = param.Parameter((1, ), doc='pixel/voxel size', precedence=-1) init_position = param.Array(np.array([-1, -1, -1])) last_clicked_position = param.Array(np.array([])) _widget_update_counter = param.Integer(0) @param.depends('ortho_viewer.z_viewer.slice_id', 'ortho_viewer.y_viewer.slice_id', 'ortho_viewer.x_viewer.slice_id', watch=True) def watch_position(self): z = self.ortho_viewer.z_viewer.slice_id y = self.ortho_viewer.y_viewer.slice_id x = self.ortho_viewer.x_viewer.slice_id pos = np.array(np.array((z, y, x)) / self.spacing) self.last_clicked_position = np.round(pos).astype(int) @param.depends('_dynamic_update_counter', watch=True) def _dynamic_img_update(self): for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()): hv_ds.img = img def dmap(self): if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed: selected_channel_config = { key: self.channel_config[key] for key in self.loaded_objects.keys() } self.segmentation_viewer = SegmentationViewer.from_channel_config( selected_channel_config, composite_channels=self.composite_channels, overlay_channels=self.overlay_channels) self._widget_update_counter += 1 self.hv_datasets = [ HvDataset(img=img, label=self.index_to_str(key), spacing=self.spacing) for key, img in self.loaded_objects.items() ] dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets] dmaps = [self.ortho_viewer(dmap) for dmap in dmaps] # invert slices and channels dmaps = list(zip(*dmaps)) # add crosshair overlay, bug if adding to an existing overlay cross = self.ortho_viewer.get_crosshair() dmaps = [dmap + cr for dmap, cr in zip(dmaps, cross)] dmaps = [self.segmentation_viewer(dmap) for dmap in dmaps] return dmaps @param.depends('_widget_update_counter') def widgets(self): return pn.Column(self.io_widgets, self.segmentation_viewer.widgets) @param.depends('_complete_update_counter') def _rebuild_panel(self): self.ortho_viewer = OrthoViewer(add_crosshairs=False, target_position=self.init_position) panel = self.ortho_viewer.panel(self.dmap()) # add the composite viewer above the orthoview widget (navigation checkbox) panel[1][1] = pn.Column(self.widgets(), panel[1][1]) return panel def panel(self): return pn.Row(self._rebuild_panel)
class trainer(param.Parameterized): display_df = param.DataFrame(default=pd.DataFrame()) results = param.Boolean(default=False) X = param.Array(default=None) result_string = param.String(default='') result_string = param.String('') def __init__(self, **params): super().__init__(**params) self.name_of_page = 'Test and Train' self.test_slider = pn.widgets.IntSlider(name='Test Percentage', start=0, end=100, step=10, value=20) self.tt_button = pn.widgets.Button(name='Train and Test', button_type='primary') self.tt_button.on_click(self.train_test) self.tt_model = pn.widgets.Select( name='Select', options=['Random Forrest Classifier']) def train_test(self, event): #get values from sentiment. self.display_df = convert_sentiment_values(self.display_df) y = self.display_df['label'] #get train test sets X_train, X_test, y_train, y_test = train_test_split( self.X, y, test_size=self.test_slider.value / 100, random_state=0) if self.tt_model.value == 'Random Forrest Classifier': sentiment_classifier = RandomForestClassifier(n_estimators=1000, random_state=0) sentiment_classifier.fit(X_train, y_train) y_pred = sentiment_classifier.predict(X_test) self.y_test = y_test self.y_pred = y_pred self.analyze() def analyze(self): self.cm = confusion_matrix(self.y_test, self.y_pred) self.cr = classification_report(self.y_test, self.y_pred) self.acc_score = accuracy_score(self.y_test, self.y_pred) splits = self.cr.split('\n') cml = self.cm.tolist() self.result_string = f""" ### Classification Report <pre> {splits[0]} {splits[1]} {splits[2]} {splits[3]} {splits[4]} {splits[5]} {splits[6]} {splits[7]} {splits[8]} </pre> ### Confusion Matrix <pre> {cml[0]} {cml[1]} </pre> ### Accuracy Score <pre> {round(self.acc_score, 4)} </pre """ self.results = True def options_page(self, help_text): return pn.WidgetBox(help_text, self.tt_model, self.test_slider, self.tt_button, height=375, width=300) @pn.depends('results') def df_pane(self): if self.results == False: self.result_pane = self.display_df else: self.result_pane = pn.pane.Markdown(f""" {self.result_string} """, width=500, height=350) return pn.WidgetBox(self.result_pane, height=375, width=450) def panel(self): help_text = ( "Your text will now be trained and tested using a selected model. You may " + "choose a percentage of your data to reserve for testing, the rest will be used for " + "training. For example, if I reserve 20%, the rest of the 80% will be used for training " + "and the 20% will be used to determine how well the trained model does assigning a " + "sentiment label to the testing text. Currently, the only model available is the sklearn " + "Random Forrest Classifier model.") return pn.Row( pn.Column( pn.pane.Markdown(f'##Train and Test'), self.options_page(help_text), ), pn.Column( pn.Spacer(height=52), self.df_pane, ))
class WordEmbedder(base_page): spark_df = param.ClassSelector( class_= sdf ) display_df = param.DataFrame(default = pd.DataFrame()) df = param.DataFrame() X = param.Array(default = None) def __init__(self, **params): super().__init__(**params) # self.spark_df = spark_df self.param.name_of_page.default = 'Word Embedding' self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer', 'Glove', 'Bert']) self.we_button = pn.widgets.Button(name='Transform', button_type='primary') self.we_button.on_click(self.transform) def options_page(self): return pn.WidgetBox(self.we_model, self.we_button, height = 300, width = 300 ) def transform(self, event): print('embedding') if self.we_model.value == 'Glove': print('glove') from sparknlp.annotator import WordEmbeddingsModel word_embeddings=WordEmbeddingsModel.pretrained() word_embeddings.setInputCols(['document','stem']) word_embeddings.setOutputCol('embeddings') self.spark_df = word_embeddings.transform(self.spark_df) embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings') if self.we_model.value == 'SKLearn Count Vectorizer': from sklearn.feature_extraction.text import CountVectorizer print('join lines') corpus = join_lines(self.display_df) print('doing vectorizer') vectorizer = CountVectorizer(max_features=1500) print('vectorizing 2') X = vectorizer.fit_transform(corpus).toarray() cnt = self.spark_df.count() print('getting sentiment from spark df') labels = self.spark_df.select('sentiment').take(cnt) for n in range(cnt): labels[n] = labels[n][0] print('done getting sentiment, creating dataframe') xlist = [] for n in range(len(X)): xlist.append(list(X[n])) self.X = X embeddings_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels}) else: print('bert') from sparknlp.annotator import BertEmbeddings bertEmbeddings = BertEmbeddings.pretrained() bertEmbeddings.setInputCols(['document','stem']) bertEmbeddings.setOutputCol('embeddings') embeddings_df=bertEmbeddings.transform(self.spark_df) self.spark_df = embeddings_df embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings') self.display_df = embeddings_df self.continue_button.disabled = False @param.output('X', 'display_df') def output(self): return self.X, self.display_df
class OrthoViewer(BaseViewer): '''Slices a 3D dataset along x,y and z axes and synchronizes the views.''' navigaton_on = param.Boolean(True) z_viewer = param.Parameter(SliceViewer(axis='z')) x_viewer = param.Parameter(SliceViewer(axis='x')) y_viewer = param.Parameter(SliceViewer(axis='y')) xy_tap = param.Parameter(hv.streams.SingleTap(transient=True), instantiate=True) zy_tap = param.Parameter(hv.streams.SingleTap(transient=True), instantiate=True) xz_tap = param.Parameter(hv.streams.SingleTap(transient=True), instantiate=True) target_position = param.Array(np.array([-1, -1, -1])) _updating_position = param.Boolean(False) add_crosshairs = param.Boolean(True) @param.depends() def _invert_axes(self, elem): # NOTE should use opts(invert_axes) instead but for some reason # it fails after zooming or panning return elem.reindex(elem.kdims[::-1]) def get_crosshair(self): self.xy_v = hv.VLine(self.x_viewer._widget.value, kdims=['x', 'y'], label='xyV', group='orthoview') self.xy_h = hv.HLine(self.y_viewer._widget.value, kdims=['x', 'y'], label='xyH', group='orthoview') self.zy_v = hv.VLine(self.z_viewer._widget.value, kdims=['za', 'y'], label='zyV', group='orthoview') self.zy_h = hv.HLine(self.y_viewer._widget.value, kdims=['za', 'y'], label='zyH', group='orthoview') self.xz_v = hv.VLine(self.x_viewer._widget.value, kdims=['x', 'zb'], label='xzV', group='orthoview') self.xz_h = hv.HLine(self.z_viewer._widget.value, kdims=['x', 'zb'], label='xzH', group='orthoview') return [(self.xy_v, self.xy_h), (self.zy_v, self.zy_h), (self.xz_v, self.xz_h)] def _link_crosshairs(self): self.xy_v.data = self.x_viewer.slice_id self.xy_h.data = self.y_viewer.slice_id self.zy_v.data = self.z_viewer.slice_id self.zy_h.data = self.y_viewer.slice_id self.xz_v.data = self.x_viewer.slice_id self.xz_h.data = self.z_viewer.slice_id self._jslink_discrete_slider(self.x_viewer._widget, self.xy_v) self._jslink_discrete_slider(self.y_viewer._widget, self.xy_h) self._jslink_discrete_slider(self.z_viewer._widget, self.zy_v) self._jslink_discrete_slider(self.y_viewer._widget, self.zy_h) self._jslink_discrete_slider(self.x_viewer._widget, self.xz_v) self._jslink_discrete_slider(self.z_viewer._widget, self.xz_h) def _jslink_discrete_slider(self, widget, line): '''hack to jslink pn.widgets.DiscreteSlider to vertical/horizontal lines. links the underlying IntSlider and index list of available values''' code = ''' var vals = {}; glyph.location = vals[source.value] '''.format(str(widget.values)) return widget._slider.jslink(line, code={'value': code}) @param.depends() def _update_dynamic_values(self, xy, zy, xz): '''render dummy plots to force updating the sliders, getting plot size, etc.''' self.frame_y_size = hv.render(xy).frame_height hv.render(zy) # init slicer self.frame_z_size = hv.render(xz).frame_height def _call(self, dmap): dmap_xy = self.z_viewer(dmap) dmap_zy = self.x_viewer(dmap).redim(z='za').apply(self._invert_axes) dmap_xz = self.y_viewer(dmap).redim(z='zb') self._init_tap_navigator(dmap_xy, dmap_zy, dmap_xz) self.z_viewer.slice_init = self.target_position[0] self.y_viewer.slice_init = self.target_position[1] self.x_viewer.slice_init = self.target_position[2] return (dmap_xy, dmap_zy, dmap_xz) @param.depends('target_position', watch=True) def _update_target_position(self): if not self._updating_position: self._updating_position = True self.z_viewer.moveto(self.target_position[0]) self.y_viewer.moveto(self.target_position[1]) self.x_viewer.moveto(self.target_position[2]) self._updating_position = False @param.depends('xy_tap.x', 'xy_tap.y', watch=True) def _update_xy_sliders(self): if self.navigaton_on and self.xy_tap.x is not None and self.xy_tap.y is not None: self.target_position = np.array( [self.z_viewer.slice_id, self.xy_tap.y, self.xy_tap.x]) @param.depends('zy_tap.x', 'zy_tap.y', watch=True) def _update_zy_sliders(self): if self.navigaton_on and self.zy_tap.x is not None and self.zy_tap.y is not None: self.target_position = np.array( [self.zy_tap.x, self.zy_tap.y, self.x_viewer.slice_id]) @param.depends('xz_tap.x', 'xz_tap.y', watch=True) def _update_xz_sliders(self): if self.navigaton_on and self.xz_tap.x is not None and self.xz_tap.y is not None: self.target_position = np.array( [self.xz_tap.y, self.y_viewer.slice_id, self.xz_tap.x]) def _init_tap_navigator(self, xy, zy, xz): self.xy_tap.source = xy self.zy_tap.source = zy self.xz_tap.source = xz def panel(self, dmaps): xy, zy, xz = dmaps self._update_dynamic_values(xy, zy, xz) zy.opts( opts.Image(frame_width=self.frame_z_size, frame_height=self.frame_y_size), opts.RGB(frame_width=self.frame_z_size, frame_height=self.frame_y_size), ) if self.add_crosshairs: self.get_crosshair() panel_xy = self.z_viewer.panel( (xy * self.xy_h * self.xy_v).relabel(group='orthoview')) panel_zy = self.x_viewer.panel( (zy * self.zy_h * self.zy_v).relabel(group='orthoview')) panel_xz = self.y_viewer.panel( (xz * self.xz_h * self.xz_v).relabel(group='orthoview')) else: panel_xy = self.z_viewer.panel(xy.relabel(group='orthoview')) panel_zy = self.x_viewer.panel(zy.relabel(group='orthoview')) panel_xz = self.y_viewer.panel(xz.relabel(group='orthoview')) self._link_crosshairs() return pn.Column(pn.Row(panel_xy, panel_zy), pn.Row(panel_xz, self.param.navigaton_on))
class PreProcessor(param.Parameterized): # df will be the variable holding the dataframe of text df = param.DataFrame() # title to display for each tab name_of_page = param.String(default = 'Name of page') # dataframe to display. display_df = param.DataFrame(default = pd.DataFrame()) # stopword_df is the dataframe containing the stopewords stopword_df = param.DataFrame(default = pd.DataFrame()) stopwords = param.List(default = []) X = param.Array(default = None) ready = param.Boolean( default=False, doc='trigger for moving to the next page', ) def __init__(self, **params): super().__init__(**params) # button for the pre-processing page self.continue_button = pn.widgets.Button(name='Continue', width = 100, button_type='primary') self.continue_button.on_click(self.continue_ready) # load text widgets self.header_checkbox = pn.widgets.Checkbox(name='Header included in file') self.load_file = pn.widgets.FileInput() self.load_file.link(self.df, callbacks={'value': self.load_df}) self.header_checkbox = pn.widgets.Checkbox(name='Header included in file') # tokenize widgets self.search_pattern_input = pn.widgets.TextInput(name='Search Pattern', value = '\w+', width = 100) # remove stop words widgets self.load_words_button = pn.widgets.FileInput() self.load_words_button.link(self.stopwords, callbacks={'value': self.load_stopwords}) # stem widgets self.stem_choice = pn.widgets.Select(name='Select', options=['Porter', 'Snowball']) # embedding widgets self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer']) @param.output('X', 'display_df') def output(self): return self.X, self.display_df @param.depends('display_df') def df_pane(self): return pn.WidgetBox(self.display_df, height = 300, width = 400) # load text page functions #----------------------------------------------------------------------------------------------------- def load_df(self, df, event): info = io.BytesIO(self.load_file.value) if self.header_checkbox.value==True: self.df = pd.read_csv(info) else: self.df = pd.read_csv(info, sep='\n', header = None, names=['text']) self.display_df = self.df def load_text_page(self): helper_text = ( "This simple Sentiment Analysis NLP app will allow you to select a few different options " + "for some preprocessing steps to prepare your text for testing and training. " + "It will then allow you to choose a model to train, the percentage of data to " + "preserve for test, while the rest will be used to train the model. Finally, " + "some initial metrics will be displayed to determine how well the model did to predict " + "the testing results." + " " + "Please choose a csv file that contains lines of text to analyze. This text should " + "have a text column as well as a sentiment column. If there is a header included in the file, " + "make sure to check the header checkbox." ) return pn.Row( pn.Column( pn.pane.Markdown(f'##Load Text:'), pn.Column( helper_text, self.header_checkbox, self.load_file ), ), pn.Column( pn.Spacer(height=52), self.df_pane, ) ) #----------------------------------------------------------------------------------------------------- # tokenize page options #----------------------------------------------------------------------------------------------------- def tokenize_option_page(self): help_text = ("Tokenization will break your text into a list of single articles " + "(ex. ['A', 'cat', 'walked', 'into', 'the', 'house', '.']). Specify a regular " + "expression (regex) search pattern to use for splitting the text.") return pn.Column( pn.pane.Markdown(f'##Tokenize options:'), pn.WidgetBox(help_text, self.search_pattern_input, height = 300, width = 300 ) ) #----------------------------------------------------------------------------------------------------- # remove stopwords page #----------------------------------------------------------------------------------------------------- def remove_stopwords_page(self): help_text = ( "Stop words are words that do not add any value to the sentiment of the text. " + "Removing them may improve your sentiment results. You may load a list of stop words " + "to exclude from your text." ) return pn.Row( pn.Column( pn.pane.Markdown(f'##Load Stopwords:'), pn.WidgetBox(help_text, self.load_words_button, height = 300, width = 300 ) ), pn.Column( pn.Spacer(height=52), pn.WidgetBox(self.stopword_df, height = 300, width = 400) ) ) def load_stopwords(self, stopwords, event): info = io.BytesIO(self.load_words_button.value) self.stopwords = pd.read_pickle(info) self.stopword_df = pd.DataFrame({'stop words': self.stopwords}) #----------------------------------------------------------------------------------------------------- # stemming page #----------------------------------------------------------------------------------------------------- def stemmer_page(self): help_text = ( "Stemming is a normalization step for the words in your text. Something that is " + "plural should probably still be clumped together with a singular version of a word, " + "for example. Stemming will basically remove the ends of words. Here you can choose " + "between a Porter Stemmer or Snowball Stemmer. Porter is a little less aggressive than " + "Snowball, however, Snowball is considered a slight improvement over Porter." ) return pn.Column( pn.pane.Markdown(f'##Stemmer options:'), pn.WidgetBox(help_text, self.stem_choice, height = 300, width = 300) ) #----------------------------------------------------------------------------------------------------- # embedding page #----------------------------------------------------------------------------------------------------- def word_embedding_page(self): help_text = ("Embedding the process of turning words into numerical vectors. " + "There have been several algorithms developed to do this, however, currently in this " + "app, the sklearn count vectorizer is available. This algorithm will return a sparse " + "matrix represention of all the words in your text." ) return pn.Column( pn.pane.Markdown(f'##Choose embedding model:'), pn.WidgetBox(help_text, self.we_model, height = 300, width = 300 ) ) #----------------------------------------------------------------------------------------------------- def continue_ready(self, event): # Set up for tokenization tokenizer = RegexpTokenizer(self.search_pattern_input.value) # Set up for stemming if self.stem_choice.value == 'Porter': stemmer = PorterStemmer() else: stemmer = SnowballStemmer() # Set up for embedding if self.we_model.value == 'SKLearn Count Vectorizer': # Create a vectorizer instance vectorizer = CountVectorizer(max_features=1000) corpus = [] #loop through each line of data for n in range(len(self.display_df)): sentence = self.display_df.iloc[n].text #1. Tokenize tokens = tokenizer.tokenize(sentence) #2. remove stop words tokens_no_sw = [word for word in tokens if not word in self.stopwords] #3. stem the remaining words stem_words = [stemmer.stem(x) for x in tokens_no_sw] #Join the words back together as one string and append this string to your corpus. corpus.append(' '.join(stem_words)) X = vectorizer.fit_transform(corpus).toarray() labels = self.display_df['sentiment'] xlist = [] for n in range(len(X)): xlist.append(list(X[n])) self.X = X self.display_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels}) self.ready = True def panel(self): return pn.Column( pn.Tabs( ('Load Text', self.load_text_page), ('Tokenize', self.tokenize_option_page), ('Remove Stopwords', self.remove_stopwords_page), ('Stem', self.stemmer_page), ('Embed', self.word_embedding_page) ), self.continue_button )
class Z(param.Parameterized): z = param.Array(default=numpy.array([1]))
class EditableHvDataset(HvDataset): '''Extract a data array from a holoviews element and makes it editable''' locked_mask = param.Array( precedence=-1, doc='''mask of region that should not be updated''') drawing_label = param.Selector(default=1, objects=[-1, 0, 1]) editor_switches = param.ObjectSelector( default='pick label', objects=['-', 'pick label', 'fill label']) locking_switches = param.ListSelector(default=[], objects=['background', 'foreground']) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.update_locked_mask() self.update_drawing_label_list() def click_callback(self, coords): if len(coords) != self.img.ndim: raise ValueError( 'Supplied coordinates: {} does not match the image dimensions: {}' .format(coords, self.img.ndim)) coords = tuple(int(round(c)) for c in coords) clicked_label = self.img[coords] if 'pick label' == self.editor_switches: self.drawing_label = clicked_label elif 'fill label' == self.editor_switches: mask = self.img == clicked_label self.write_label(mask) @param.depends('img', 'locking_switches', watch=True) def update_locked_mask(self): mask = np.zeros_like(self.img, dtype=bool) if 'background' in self.locking_switches: mask[self.img == 0] = True if 'foreground' in self.locking_switches: mask[self.img > 0] = True self.locked_mask = mask def write_label(self, mask): new_array = self.img.astype(np.int16) new_array[mask & (~self.locked_mask)] = self.drawing_label # assign new array to trigger updates self.img = new_array @param.depends('img', watch=True) def update_drawing_label_list(self): '''List of label to choose from.''' max_label = self.img.max() # add an extra label to annotate new objects unique_labels = list(range(-1, max_label + 2)) self.param.drawing_label.objects = unique_labels if self.drawing_label not in unique_labels: self.drawing_label = -1 def delete_label(self, event=None): self.img[self.img == self.drawing_label] = -1 self.img = self.img @param.depends('img') def _drawing_label_wg(self): return pn.panel(self.param.drawing_label) def widgets(self): delete_button = pn.widgets.Button(name='delete selected label') delete_button.on_click(self.delete_label) editor_switches_wg = pn.Param( self.param.editor_switches, show_name=True, name="on click", widgets={'editor_switches': { 'type': pn.widgets.RadioButtonGroup }}) locking_switches_wg = pn.Param(self.param.locking_switches, show_name=True, name='lock', widgets={ 'locking_switches': { 'type': pn.widgets.CheckButtonGroup } }) return pn.WidgetBox(self._drawing_label_wg, editor_switches_wg, locking_switches_wg, delete_button)