class SourceEmbeddingContextCreatorWidget(object): def __init__(self, vocabs): self._ecs = CanonicalDict() self._w_ecs = widgets.Dropdown(options=[], description="Emb. Ctxs.") self._ecs.link_dropdown(self._w_ecs) self._w_ecs.observe(self._selector_handler, names=["value"]) self._w_ec = SourceEmbeddingContextWidget(vocabs) self._w_create = widgets.Button(description="Create") self._w_create.on_click(self._create_handler) self._w_delete = widgets.Button(description="Delete") self._w_delete.on_click(self._delete_handler) self._w_ec_name = widgets.Text( value="src-ec-{}".format(len(self._ecs)), description="Name:") self._w_main = widgets.VBox([ self._w_ecs, self._w_ec(), self._w_ec_name, widgets.HBox([self._w_create, self._w_delete]), ]) def __call__(self): return self._w_main def _create_handler(self, button): name = self._w_ec_name.value.strip() if name == "": return params = self._w_ec.params self._ecs[name] = params self._w_ecs.value = name def _delete_handler(self, button): name = self._w_ecs.value if name is not None: del self._ecs[name] def _selector_handler(self, change): name = change["new"] if name is not None: self._w_ec.params = self._ecs[name] self._w_ec_name.value = name @property def params(self): return { "__type__": "SourceEmbeddingContextCreatorWidget", "embedding_contexts": self._ecs, } @params.setter def params(self, new_params): self._ecs.clear() self._ecs.update(new_params["embedding_contexts"])
class DatasetCreatorWidget(object): def __init__(self, params=None): self._w_datasets = widgets.Dropdown(options=[], description="Dataset:") self._w_dataset = Seq2SeqFileDatasetWidget() self._w_save = widgets.Button(description="Save") self._w_delete = widgets.Button(description="Delete") self._datasets = CanonicalDict() self._datasets.link_dropdown(self._w_datasets) self._w_main = widgets.VBox([ self._w_datasets, self._w_dataset(), widgets.HBox([self._w_save, self._w_delete]), ]) self._w_datasets.observe(self._selector_change, names=["value"]) self._w_save.on_click(self._save_callback) self._w_delete.on_click(self._delete_callback) def __call__(self): return self._w_main def _selector_change(self, change): name = change["new"] if name is not None: self._w_dataset.params = self._datasets[name] def _save_callback(self, button): params = self._w_dataset.params self._datasets[params["__name__"]] = params self._w_datasets.value = params["__name__"] def _delete_callback(self, button): if self._w_datasets.value is not None: del self._datasets[self._w_datasets.value] @property def params(self): return { "__type__": "DatasetCreatorWidget", "datasets": self._datasets } @params.setter def params(self, new_params): self._datasets.clear() self._datasets.update(new_params["datasets"]) @property def link(self): return self._datasets
class SourceEmbeddingContextWidget(object): def __init__(self, vocabs): self._vocabs = vocabs self._w_vocabs = widgets.Dropdown(options=vocabs.keys(), description="Vocab:") self._vocabs.link_dropdown(self._w_vocabs) self._emb_ctxs = CanonicalDict() self._w_emb_ctxs = widgets.Dropdown(options=[], description="Emb. Ctx.") self._emb_ctxs.link_dropdown(self._w_emb_ctxs) self._w_emb_ctxs.observe(self._selector_handler, names=["value"]) self._w_ftr_name = widgets.Text(description="Feature:") self._w_emb_dim = widgets.IntText(value=512, min=1, max=9999, description="Emb. Dim.") self._w_emb_dropout = widgets.FloatText(value=0, min=0, max=1, description="Emb. Dropout") self._w_add_ctx_button = widgets.Button(description="Add Emb. Ctx.") self._w_del_ctx_button = widgets.Button(description="Del. Emb. Ctx.") self._w_add_ctx_button.on_click(self._add_ctx_handler) self._w_del_ctx_button.on_click(self._del_ctx_handler) self._w_main = widgets.VBox([ self._w_emb_ctxs, widgets.HBox([self._w_ftr_name, self._w_vocabs]), widgets.HBox([self._w_emb_dim, self._w_emb_dropout]), widgets.HBox([self._w_add_ctx_button, self._w_del_ctx_button]), ]) def __call__(self): return self._w_main def _add_ctx_handler(self, button): ftr = self._w_ftr_name.value.strip() if ftr == "": return params = { "embedding_dim": self._w_emb_dim.value, "embedding_dropout": self._w_emb_dropout.value, "feature": ftr, "vocab": self._w_vocabs.value } self._emb_ctxs[ftr] = params self._w_emb_ctxs.value = ftr def _del_ctx_handler(self, button): ftr = self._w_emb_ctxs.value if ftr is None: return del self._emb_ctxs[ftr] def _selector_handler(self, change): ftr = change["new"] if ftr is None: return params = self._emb_ctxs[ftr] self._w_emb_dim.value = params["embedding_dim"] self._w_emb_dropout.value = params["embedding_dropout"] self._w_ftr_name.value = ftr self._w_vocabs.value = params["vocab"] @property def params(self): return { "embedding_contexts": {ftr: dict(vals) for ftr, vals in self._emb_ctxs.items()} } @params.setter def params(self, new_params): self._emb_ctxs.clear() self._emb_ctxs.update(new_params["embedding_contexts"])
class WordListCreatorWidget(object): def __init__(self, datasets, params=None): self._datasets = datasets self._word_lists = CanonicalDict() self._w_word_lists = widgets.Dropdown(options=[], description="Word Lists:") self._word_lists.link_dropdown(self._w_word_lists) #self._w_word_lists.observe(self._selector_change, names=["value"]) self._w_datasets = widgets.Dropdown(options=datasets.keys(), description="Dataset:") self._datasets.link_dropdown(self._w_datasets) self._w_part = widgets.Dropdown(options=["source", "target"], description="Part") self._w_create = widgets.Button(description="Create") self._w_delete = widgets.Button(description="Delete") self._w_create.on_click(self._create_callback) self._w_main = widgets.VBox([ self._w_word_lists, widgets.HBox([self._w_datasets, self._w_part]), widgets.HBox([self._w_create, self._w_delete]), widgets.Label(), ]) def __call__(self): return self._w_main def _create_callback(self, button): ds_name = self._w_datasets.value ds_part = self._w_part.value dataset = RAMDataset(self._datasets[ds_name][ds_part]) word_counts = dataset.word_counts() names = [] for ftr, counts in word_counts.items(): wl_name = ds_name + ":" + ds_part + ":" + ftr self._word_lists[wl_name] = counts names.append(wl_name) self._w_word_lists.value = wl_name self._w_main.children[-1].value = "Created: {}".format( ", ".join(names)) def _delete_callback(self, button): wl_name = self._w_word_lists.value if wl_name is not None: del self._word_lists[wl_name] @property def params(self): return { "__type__": "WordListCreatorWidget", "word_lists": self._word_lists } @params.setter def params(self, new_params): self._word_lists.clear() self._word_lists.update(new_params["word_lists"]) @property def link(self): return self._word_lists
class SourceVocabCreatorWidget(object): def __init__(self, word_lists, params=None): self._vocabs = CanonicalDict() self._w_vocabs = widgets.Dropdown(options=[], description="Vocabs:") self._vocabs.link_dropdown(self._w_vocabs) self._w_vocabs.observe(self._selector_change, names=["value"]) self._word_lists = word_lists self._w_word_lists = widgets.Dropdown(options=word_lists.keys(), description="Word Lists:") self._w_word_lists.observe(lambda x: self._select_word_list_callback(), names=["value"]) self._word_lists.link_dropdown(self._w_word_lists) self._w_vocab_size = widgets.IntSlider(min=1, max=1, value=1, description="Max V. Size:") self._w_vocab_size.observe(lambda x: self._calculate_vocab_size(), names=["value"]) self._w_min_count = widgets.IntSlider(min=1, max=1, value=1, description="Min Freq.:") self._w_min_count.observe(lambda x: self._calculate_vocab_size(), names=["value"]) self._w_actual_vocab_size = widgets.Label( description="Actual V. Size:") self._w_unk_rate = widgets.Label() self._w_start_token = widgets.Text(value="<sos>", description="Start Tkn:") self._w_pad_token = widgets.Text(value="<pad>", description="Pad Tkn:") self._w_unk_token = widgets.Text(value="<unk>", description="Unk Tkn:") self._w_name = widgets.Text( value="target_vocab-{}".format(len(self._vocabs)), description="Name:") self._w_create = widgets.Button(description="Create") self._w_create.on_click(self._create_callback) self._w_delete = widgets.Button(description="Delete") self._w_delete.on_click(self._delete_callback) self._w_main = widgets.VBox([ self._w_vocabs, widgets.HBox([self._w_word_lists, self._w_name]), widgets.HBox([self._w_min_count, self._w_vocab_size,]), widgets.HBox([self._w_start_token]), widgets.HBox([self._w_pad_token, self._w_unk_token]), widgets.VBox([self._w_actual_vocab_size, self._w_unk_rate]), widgets.HBox([self._w_create, self._w_delete]), ]) self._select_word_list_callback() def __call__(self): return self._w_main def _select_word_list_callback(self): wl_name = self._w_word_lists.value if wl_name is None: return wl = self._word_lists[wl_name] self._sorted_word_list = sorted(wl.items(), key=lambda x: x[0], reverse=True) self._sorted_word_list.sort(key=lambda x: x[1], reverse=True) self._w_vocab_size.min = 1 self._w_vocab_size.value = 1 self._w_vocab_size.max = len(wl) self._w_vocab_size.value = len(wl) self._w_min_count.min = 0 self._w_min_count.value = 0 self._w_min_count.max = max(wl.values()) self._calculate_vocab_size() def _calculate_vocab_size(self): counts = self._sorted_word_list[:self._w_vocab_size.value] counts = [wc for wc in counts if wc[1] >= self._w_min_count.value] self._actual_vsize = len(counts) self._w_actual_vocab_size.value = "Actual Vocab Size: {}".format( self._actual_vsize) total_freq = sum([wc[1] for wc in self._sorted_word_list]) self._unk_rate = 100 * (1 - sum([wc[1] for wc in counts]) / total_freq) self._w_unk_rate.value = "Unknown Word Rate: {:5.3f}%".format( self._unk_rate) @property def params(self): return { "__type__": "SourceVocabCreatorWidget", "word_list": self._w_word_lists.value, "max_size": self._w_vocab_size.value, "min_count": self._w_min_count.value, "start_token": self._w_start_token.value, "unk_token": self._w_unk_token.value, "pad_token": self._w_pad_token.value, "actual_vocab_size": self._actual_vsize, "unknown_word_rate": self._unk_rate, "name": self._w_name.value.strip(), } @params.setter def params(self, new_params): self._w_word_lists.value = new_params["word_list"] self._w_vocab_size.value = new_params["max_size"] self._w_min_count.value = new_params["min_count"] self._w_start_token.value = new_params["start_token"] self._w_pad_token.value = new_params["pad_token"] self._w_unk_token.value = new_params["unk_token"] self._actual_vsize = new_params["actual_vocab_size"] self._unk_rate = new_params["unknown_word_rate"] self._w_name.value = new_params["name"] def _create_callback(self, button): params = self.params if params["name"] == "": return self._vocabs[params["name"]] = params self._w_vocabs.value = params["name"] self._w_name.value = "source-vocab-{}".format(len(self._vocabs)) def _delete_callback(self, button): if self._w_vocabs.value is not None: del self._vocabs[self._w_vocabs["name"]] def _selector_change(self, change): if change["new"] is not None: self.params = self._vocabs[change["new"]] @property def link(self): return self._vocabs @link.setter def link(self, new_link): self._vocabs.clear() self._vocabs.update(new_link)