コード例 #1
0
    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
コード例 #2
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
コード例 #3
0
def build_model(general_config):
    """
  Build model

  NOTE: (for default config)
  1) Model's architecture (embedding B)
    LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax

  2) Memory's architecture
    a) Query module (embedding A)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax

    b) Output module (embedding C)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd
  """
    train_config = general_config.train_config
    dictionary = general_config.dictionary
    use_bow = general_config.use_bow
    nhops = general_config.nhops
    add_proj = general_config.add_proj
    share_type = general_config.share_type
    enable_time = general_config.enable_time
    add_nonlin = general_config.add_nonlin

    in_dim = train_config["in_dim"]
    out_dim = train_config["out_dim"]
    max_words = train_config["max_words"]
    voc_sz = train_config["voc_sz"]

    if not use_bow:
        print('We use PE')
        train_config["weight"] = np.ones((in_dim, max_words), np.float32)
        for i in range(in_dim):
            for j in range(max_words):
                train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * (
                    j + 1 - (max_words + 1) / 2)
        train_config["weight"] = 1 + 4 * train_config["weight"] / (in_dim *
                                                                   max_words)

    memory = {}
    model = Sequential()
    model.add(LookupTable(voc_sz, in_dim))
    if not use_bow:
        if enable_time:
            print('We use TE')
            model.add(ElemMult(train_config["weight"][:, :-1]))
        else:
            model.add(ElemMult(train_config["weight"]))

    model.add(Sum(dim=1))

    proj = {}
    for i in range(nhops):
        if use_bow:
            memory[i] = MemoryBoW(train_config)
        else:
            memory[i] = MemoryL(train_config)

        # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]"
        memory[i].nil_word = dictionary['nil']
        model.add(Duplicate())
        p = Parallel()
        p.add(memory[i])

        if add_proj:
            print('We add linear layer between internal states')
            proj[i] = LinearNB(in_dim, in_dim)
            p.add(proj[i])
        else:
            p.add(Identity())

        model.add(p)
        model.add(AddTable())
        if add_nonlin:
            print('We use non-linearity (RELU) to internal states')
            model.add(ReLU())

    model.add(LinearNB(out_dim, voc_sz, True))
    model.add(Softmax())

    # Share weights
    if share_type == 1:
        # Type 1: adjacent weight tying
        print('We use adjacent weight tying')
        memory[0].emb_query.share(model.modules[0])
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[i - 1].emb_out)

        model.modules[-2].share(memory[len(memory) - 1].emb_out)

    elif share_type == 2:
        # Type 2: layer-wise weight tying
        print('We use layer-wise weight tying (RNN-style)')
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[0].emb_query)
            memory[i].emb_out.share(memory[0].emb_out)

    if add_proj:
        for i in range(1, nhops):
            proj[i].share(proj[0])

    # Cost
    loss = CrossEntropyLoss()
    loss.size_average = False
    loss.do_softmax_bprop = True
    model.modules[-1].skip_bprop = True

    return memory, model, loss