class SourceEmbeddingContextCreatorWidget(object):
    def __init__(self, vocabs):

        self._ecs = CanonicalDict()
        self._w_ecs = widgets.Dropdown(options=[],
                                       description="Emb. Ctxs.")
        self._ecs.link_dropdown(self._w_ecs)
        self._w_ecs.observe(self._selector_handler, names=["value"])
        self._w_ec = SourceEmbeddingContextWidget(vocabs)

        self._w_create = widgets.Button(description="Create")
        self._w_create.on_click(self._create_handler)
        self._w_delete = widgets.Button(description="Delete")
        self._w_delete.on_click(self._delete_handler)
        self._w_ec_name = widgets.Text(
            value="src-ec-{}".format(len(self._ecs)),
            description="Name:")

        self._w_main = widgets.VBox([
            self._w_ecs,
            self._w_ec(),
            self._w_ec_name,
            widgets.HBox([self._w_create, self._w_delete]),

        ])

    def __call__(self):
        return self._w_main

    def _create_handler(self, button):
        name = self._w_ec_name.value.strip()
        if name == "":
            return
        params = self._w_ec.params 
        self._ecs[name] = params
        self._w_ecs.value = name

    def _delete_handler(self, button):
        name = self._w_ecs.value
        if name is not None:
            del self._ecs[name]

    def _selector_handler(self, change):
        name = change["new"]
        if name is not None:
            self._w_ec.params = self._ecs[name]
            self._w_ec_name.value = name

    @property
    def params(self):
        return {
            "__type__": "SourceEmbeddingContextCreatorWidget",
            "embedding_contexts": self._ecs,
        }

    @params.setter
    def params(self, new_params):
        self._ecs.clear()
        self._ecs.update(new_params["embedding_contexts"])
예제 #2
0
class DatasetCreatorWidget(object):
    def __init__(self, params=None):

        self._w_datasets = widgets.Dropdown(options=[], description="Dataset:")
        self._w_dataset = Seq2SeqFileDatasetWidget()
        self._w_save = widgets.Button(description="Save")
        self._w_delete = widgets.Button(description="Delete")
        self._datasets = CanonicalDict()
        self._datasets.link_dropdown(self._w_datasets)

        self._w_main = widgets.VBox([
            self._w_datasets,
            self._w_dataset(),
            widgets.HBox([self._w_save, self._w_delete]),
        ])
        
        self._w_datasets.observe(self._selector_change, names=["value"])
        self._w_save.on_click(self._save_callback)
        self._w_delete.on_click(self._delete_callback)

    def __call__(self):
        return self._w_main

    def _selector_change(self, change):
        name = change["new"]
        if name is not None:
            self._w_dataset.params = self._datasets[name]

    def _save_callback(self, button):
        params = self._w_dataset.params
        self._datasets[params["__name__"]] = params
        self._w_datasets.value = params["__name__"]

    def _delete_callback(self, button):
        if self._w_datasets.value is not None:
            del self._datasets[self._w_datasets.value]

    @property
    def params(self):
        return {
            "__type__": "DatasetCreatorWidget",
            "datasets": self._datasets
        }

    @params.setter
    def params(self, new_params):
        self._datasets.clear()
        self._datasets.update(new_params["datasets"])

    @property
    def link(self):
        return self._datasets
예제 #3
0
class SourceEmbeddingContextWidget(object):
    def __init__(self, vocabs):
        self._vocabs = vocabs
        self._w_vocabs = widgets.Dropdown(options=vocabs.keys(),
                                          description="Vocab:")
        self._vocabs.link_dropdown(self._w_vocabs)

        self._emb_ctxs = CanonicalDict()
        self._w_emb_ctxs = widgets.Dropdown(options=[],
                                            description="Emb. Ctx.")
        self._emb_ctxs.link_dropdown(self._w_emb_ctxs)
        self._w_emb_ctxs.observe(self._selector_handler, names=["value"])

        self._w_ftr_name = widgets.Text(description="Feature:")
        self._w_emb_dim = widgets.IntText(value=512,
                                          min=1,
                                          max=9999,
                                          description="Emb. Dim.")

        self._w_emb_dropout = widgets.FloatText(value=0,
                                                min=0,
                                                max=1,
                                                description="Emb. Dropout")
        self._w_add_ctx_button = widgets.Button(description="Add Emb. Ctx.")
        self._w_del_ctx_button = widgets.Button(description="Del. Emb. Ctx.")
        self._w_add_ctx_button.on_click(self._add_ctx_handler)
        self._w_del_ctx_button.on_click(self._del_ctx_handler)

        self._w_main = widgets.VBox([
            self._w_emb_ctxs,
            widgets.HBox([self._w_ftr_name, self._w_vocabs]),
            widgets.HBox([self._w_emb_dim, self._w_emb_dropout]),
            widgets.HBox([self._w_add_ctx_button, self._w_del_ctx_button]),
        ])

    def __call__(self):
        return self._w_main

    def _add_ctx_handler(self, button):
        ftr = self._w_ftr_name.value.strip()
        if ftr == "":
            return
        params = {
            "embedding_dim": self._w_emb_dim.value,
            "embedding_dropout": self._w_emb_dropout.value,
            "feature": ftr,
            "vocab": self._w_vocabs.value
        }
        self._emb_ctxs[ftr] = params
        self._w_emb_ctxs.value = ftr

    def _del_ctx_handler(self, button):
        ftr = self._w_emb_ctxs.value
        if ftr is None:
            return
        del self._emb_ctxs[ftr]

    def _selector_handler(self, change):
        ftr = change["new"]
        if ftr is None:
            return
        params = self._emb_ctxs[ftr]
        self._w_emb_dim.value = params["embedding_dim"]
        self._w_emb_dropout.value = params["embedding_dropout"]
        self._w_ftr_name.value = ftr
        self._w_vocabs.value = params["vocab"]

    @property
    def params(self):
        return {
            "embedding_contexts":
            {ftr: dict(vals)
             for ftr, vals in self._emb_ctxs.items()}
        }

    @params.setter
    def params(self, new_params):
        self._emb_ctxs.clear()
        self._emb_ctxs.update(new_params["embedding_contexts"])
예제 #4
0
class WordListCreatorWidget(object):
    def __init__(self, datasets, params=None):
        self._datasets = datasets
        self._word_lists = CanonicalDict()
        self._w_word_lists = widgets.Dropdown(options=[],
                                              description="Word Lists:")
        self._word_lists.link_dropdown(self._w_word_lists)
        #self._w_word_lists.observe(self._selector_change, names=["value"])

        self._w_datasets = widgets.Dropdown(options=datasets.keys(),
                                            description="Dataset:")
        self._datasets.link_dropdown(self._w_datasets)
        self._w_part = widgets.Dropdown(options=["source", "target"],
                                        description="Part")

        self._w_create = widgets.Button(description="Create")
        self._w_delete = widgets.Button(description="Delete")
        self._w_create.on_click(self._create_callback)

        self._w_main = widgets.VBox([
            self._w_word_lists,
            widgets.HBox([self._w_datasets, self._w_part]),
            widgets.HBox([self._w_create, self._w_delete]),
            widgets.Label(),
        ])

    def __call__(self):
        return self._w_main

    def _create_callback(self, button):
        ds_name = self._w_datasets.value
        ds_part = self._w_part.value
        dataset = RAMDataset(self._datasets[ds_name][ds_part])
        word_counts = dataset.word_counts()
        names = []
        for ftr, counts in word_counts.items():
            wl_name = ds_name + ":" + ds_part + ":" + ftr
            self._word_lists[wl_name] = counts
            names.append(wl_name)
            self._w_word_lists.value = wl_name
        self._w_main.children[-1].value = "Created: {}".format(
            ", ".join(names))

    def _delete_callback(self, button):
        wl_name = self._w_word_lists.value
        if wl_name is not None:
            del self._word_lists[wl_name]

    @property
    def params(self):
        return {
            "__type__": "WordListCreatorWidget",
            "word_lists": self._word_lists
        }

    @params.setter
    def params(self, new_params):
        self._word_lists.clear()
        self._word_lists.update(new_params["word_lists"])

    @property
    def link(self):
        return self._word_lists
예제 #5
0
class SourceVocabCreatorWidget(object):
    def __init__(self, word_lists, params=None):

        self._vocabs = CanonicalDict()
        self._w_vocabs = widgets.Dropdown(options=[],
                                          description="Vocabs:")
        self._vocabs.link_dropdown(self._w_vocabs)
        self._w_vocabs.observe(self._selector_change, names=["value"])

        self._word_lists = word_lists
        self._w_word_lists = widgets.Dropdown(options=word_lists.keys(),
                                              description="Word Lists:")
        self._w_word_lists.observe(lambda x: self._select_word_list_callback(),
                                   names=["value"])
        self._word_lists.link_dropdown(self._w_word_lists)
        self._w_vocab_size = widgets.IntSlider(min=1, max=1, value=1,
                                               description="Max V. Size:")
        self._w_vocab_size.observe(lambda x: self._calculate_vocab_size(),
                                   names=["value"])
        self._w_min_count = widgets.IntSlider(min=1, max=1, value=1,
                                              description="Min Freq.:")
        self._w_min_count.observe(lambda x: self._calculate_vocab_size(),
                                  names=["value"])
        self._w_actual_vocab_size = widgets.Label(
            description="Actual V. Size:")
        self._w_unk_rate = widgets.Label()

        self._w_start_token = widgets.Text(value="<sos>",
                                           description="Start Tkn:")
        self._w_pad_token = widgets.Text(value="<pad>",
                                         description="Pad Tkn:")
        self._w_unk_token = widgets.Text(value="<unk>",
                                         description="Unk Tkn:")
        self._w_name = widgets.Text(
            value="target_vocab-{}".format(len(self._vocabs)),
            description="Name:")

        self._w_create = widgets.Button(description="Create")
        self._w_create.on_click(self._create_callback)

        self._w_delete = widgets.Button(description="Delete")
        self._w_delete.on_click(self._delete_callback)

        self._w_main = widgets.VBox([
            self._w_vocabs,
            widgets.HBox([self._w_word_lists, self._w_name]),
            widgets.HBox([self._w_min_count, self._w_vocab_size,]),
            widgets.HBox([self._w_start_token]),
            widgets.HBox([self._w_pad_token, self._w_unk_token]),
            widgets.VBox([self._w_actual_vocab_size, 
                          self._w_unk_rate]),
            widgets.HBox([self._w_create, self._w_delete]),
        ])

        self._select_word_list_callback()


    def __call__(self):
        return self._w_main

    def _select_word_list_callback(self):
        wl_name = self._w_word_lists.value
        if wl_name is None:
            return
        wl = self._word_lists[wl_name]
        self._sorted_word_list = sorted(wl.items(), 
                                        key=lambda x: x[0], reverse=True)
        self._sorted_word_list.sort(key=lambda x: x[1], reverse=True)
        
        self._w_vocab_size.min = 1 
        self._w_vocab_size.value = 1 
        self._w_vocab_size.max = len(wl)
        self._w_vocab_size.value = len(wl)

        self._w_min_count.min = 0
        self._w_min_count.value = 0
        self._w_min_count.max = max(wl.values())

        self._calculate_vocab_size()

    def _calculate_vocab_size(self):
        counts = self._sorted_word_list[:self._w_vocab_size.value]
        counts = [wc for wc in counts if wc[1] >= self._w_min_count.value]
        self._actual_vsize = len(counts)
        self._w_actual_vocab_size.value = "Actual Vocab Size: {}".format(
            self._actual_vsize)

        total_freq = sum([wc[1] for wc in self._sorted_word_list])
        self._unk_rate = 100 * (1 - sum([wc[1] for wc in counts]) / total_freq)
        self._w_unk_rate.value = "Unknown Word Rate: {:5.3f}%".format(
            self._unk_rate)
        
    @property
    def params(self):
        return {
            "__type__": "SourceVocabCreatorWidget",
            "word_list": self._w_word_lists.value,
            "max_size": self._w_vocab_size.value,
            "min_count": self._w_min_count.value,
            "start_token": self._w_start_token.value,
            "unk_token": self._w_unk_token.value,
            "pad_token": self._w_pad_token.value,
            "actual_vocab_size": self._actual_vsize,
            "unknown_word_rate": self._unk_rate,
            "name": self._w_name.value.strip(),
        }

    @params.setter
    def params(self, new_params):
        self._w_word_lists.value = new_params["word_list"]
        self._w_vocab_size.value = new_params["max_size"]
        self._w_min_count.value = new_params["min_count"]
        self._w_start_token.value = new_params["start_token"]
        self._w_pad_token.value = new_params["pad_token"]
        self._w_unk_token.value = new_params["unk_token"]
        self._actual_vsize = new_params["actual_vocab_size"]
        self._unk_rate = new_params["unknown_word_rate"]
        self._w_name.value = new_params["name"]
            
    def _create_callback(self, button):
        params = self.params
        if params["name"] == "":
            return
        self._vocabs[params["name"]] = params
        self._w_vocabs.value = params["name"]
        self._w_name.value = "source-vocab-{}".format(len(self._vocabs))
        
    def _delete_callback(self, button):
        if self._w_vocabs.value is not None:
            del self._vocabs[self._w_vocabs["name"]]

    def _selector_change(self, change):
        if change["new"] is not None:
            self.params = self._vocabs[change["new"]]

    @property
    def link(self):
        return self._vocabs

    @link.setter
    def link(self, new_link):
        self._vocabs.clear()
        self._vocabs.update(new_link)