def __init__(self,
                 data_config,
                 model_config="embc:16|embt:16|conv:48|l1:16|do:0.1|oc:BI"):
        super(Model, self).__init__()

        no_chars = data_config["num_tokens"]

        config = utils.parse_model_params(model_config)
        conv_filters = config["conv"]
        dropout_rate = config.get("do", 0)

        self.output_scheme = output_tags.get_scheme(config["oc"])

        self.ch_embeddings = nn.Embedding(no_chars,
                                          config["embc"],
                                          padding_idx=0)

        self.ch_type_embeddings = nn.Embedding(
            char_type.get_total_char_types(),
            config["embt"],
        )

        emb_dim = config["embc"] + config["embt"]

        self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters,
                                                   dropout_rate)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags)

        self.model_params = model_config
    def __init__(self, data_config, model_config="embc:16|embt:8|embs:8|cells:32|l1:16|bi:1|oc:BI"):
        super(Model, self).__init__()


        no_chars = data_config['num_char_tokens']
        log.info("no. characters: %d" % no_chars)

        no_syllables = data_config['num_tokens']
        log.info("no. syllables: %d" % no_syllables)

        config = utils.parse_model_params(model_config)

        self.output_scheme = output_tags.get_scheme(config["oc"])

        self.ch_type_embeddings = nn.Embedding(
            char_type.get_total_char_types(),
            config["embt"],
        )

        self.ch_embeddings = nn.Embedding(
            no_chars,
            config["embc"],
            padding_idx=0
        )

        self.sy_embeddings = nn.Embedding(
            no_syllables,
            config["embs"],
            padding_idx=0
        )

        if config["crf"]:
            self.crf = CRF(self.output_scheme.num_tags, batch_first=True)

        emb_dim = config["embc"] + config["embs"] + config["embt"]

        num_cells, num_lstm_output, bi_direction = utils.compute_lstm_output_dim(
            config["cells"],
            config["bi"]
        )

        self.dropout = nn.Dropout(config["do"])

        self.lstm = nn.LSTM(emb_dim, num_cells, bidirectional=bi_direction, batch_first=True)
        self.linear1 = nn.Linear(num_lstm_output, config["l1"])
        self.linear2 = nn.Linear(config["l1"], self.output_scheme.num_tags)

        self.model_params = model_config
Exemple #3
0
    def __init__(self, data_config, model_config="embc:16|embt:8|embs:8|conv:16|l1:16|do:0.0|oc:BI"):
        super(Model, self).__init__()


        no_chars = data_config['num_char_tokens']
        log.info("no. characters: %d" % no_chars)

        no_syllables = data_config['num_tokens']
        log.info("no. syllables: %d" % no_syllables)

        config = utils.parse_model_params(model_config)
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.output_scheme = output_tags.get_scheme(config["oc"])

        self.ch_type_embeddings = nn.Embedding(
            char_type.get_total_char_types(),
            config["embt"],
        )

        self.ch_embeddings = nn.Embedding(
            no_chars,
            config["embc"],
            padding_idx=0
        )

        self.sy_embeddings = nn.Embedding(
            no_syllables,
            config["embs"],
            padding_idx=0
        )

        emb_dim = config["embc"] + config["embs"] + config["embt"]

        self.dropout= torch.nn.Dropout(p=dropout_rate)

        self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 3)
        self.conv2 = ConvolutionLayer(conv_filters, conv_filters, 3, dilation=1)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags)

        self.model_params = model_config
Exemple #4
0
    def __init__(
            self,
            data_config,
            model_config="embc:16|embt:8|embs:8|conv:16|l1:16|do:0.0|oc:BI"):
        super(Model, self).__init__()

        no_chars = data_config['num_char_tokens']
        log.info("no. characters: %d" % no_chars)

        no_syllables = data_config['num_tokens']
        log.info("no. syllables: %d" % no_syllables)

        config = utils.parse_model_params(model_config)
        conv_filters = config["conv"]
        dropout_rate = config.get("do", 0)

        self.output_scheme = output_tags.get_scheme(config["oc"])

        self.ch_type_embeddings = nn.Embedding(
            char_type.get_total_char_types(),
            config["embt"],
        )

        self.ch_embeddings = nn.Embedding(no_chars,
                                          config["embc"],
                                          padding_idx=0)

        self.sy_embeddings = prepare_embedding(data_config, config)

        if "crf" in config:
            self.crf = CRF(self.output_scheme.num_tags, batch_first=True)

        emb_dim = config["embc"] + config[
            "embt"] + self.sy_embeddings.weight.shape[1]

        self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters,
                                                   dropout_rate)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags)

        self.model_params = model_config