def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) s = Sequential() s.add(self.emb_out) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False))
def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax())
class MemoryBoW(Memory): """ MemoryBoW: Query module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd with transpose + Softmax Output module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd """ def __init__(self, config): super(MemoryBoW, self).__init__(config) # 'sz' indicates number of sentences in a certain story # data with size [num_words, num_sentences, batch_size] # # After embedding, the matrix will become # [emb_dim, num_sentences, batch_size] self.data = np.zeros((config["max_words"], self.sz, config["bsz"]), np.float32) def init_query_module(self): """ Input query with size (num_words, num_questions) After embedding, with size (in_dim, num_questions) Which means that we compress each sentence into one word embedding """ self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax()) def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) s = Sequential() s.add(self.emb_out) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False))
class MemoryL(Memory): """ MemoryL: Query module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd with transpose + Softmax Output module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd """ def __init__(self, train_config): super(MemoryL, self).__init__(train_config) self.data = np.zeros( (train_config["max_words"], self.sz, train_config["bsz"]), np.float32) def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(ElemMult(self.config["weight"])) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax()) def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) s = Sequential() s.add(self.emb_out) s.add(ElemMult(self.config["weight"])) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False))
def init_query_module(self): """ Input query with size (num_words, num_questions) After embedding, with size (in_dim, num_questions) Which means that we compress each sentence into one word embedding """ self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax())
class MemoryBoW(Memory): """ MemoryBoW: Query module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd with transpose + Softmax Output module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd """ def __init__(self, config): super(MemoryBoW, self).__init__(config) #self.data = np.zeros((config.max_words, self.sz, config.bsz), np.float32) self.data = np.zeros((config['max_words'], self.sz, config['bsz']), np.float32) def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax()) def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) s = Sequential() s.add(self.emb_out) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False))
class MemoryL(Memory): """ MemoryL: Query module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd with transpose + Softmax Output module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd """ def __init__(self, train_config): super(MemoryL, self).__init__(train_config) self.data = np.zeros((train_config["max_words"], self.sz, train_config["bsz"]), np.float32) def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) s = Sequential() s.add(self.emb_query) s.add(ElemMult(self.config["weight"])) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax()) def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) s = Sequential() s.add(self.emb_out) s.add(ElemMult(self.config["weight"])) s.add(Sum(dim=1)) p = Parallel() p.add(s) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False))
def build_model(general_config): """ Build model NOTE: (for default config) 1) Model's architecture (embedding B) LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax 2) Memory's architecture a) Query module (embedding A) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax b) Output module (embedding C) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd """ train_config = general_config.train_config dictionary = general_config.dictionary use_bow = general_config.use_bow nhops = general_config.nhops add_proj = general_config.add_proj share_type = general_config.share_type enable_time = general_config.enable_time add_nonlin = general_config.add_nonlin in_dim = train_config["in_dim"] out_dim = train_config["out_dim"] max_words = train_config["max_words"] voc_sz = train_config["voc_sz"] if not use_bow: print('We use PE') train_config["weight"] = np.ones((in_dim, max_words), np.float32) for i in range(in_dim): for j in range(max_words): train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * ( j + 1 - (max_words + 1) / 2) train_config["weight"] = 1 + 4 * train_config["weight"] / (in_dim * max_words) memory = {} model = Sequential() model.add(LookupTable(voc_sz, in_dim)) if not use_bow: if enable_time: print('We use TE') model.add(ElemMult(train_config["weight"][:, :-1])) else: model.add(ElemMult(train_config["weight"])) model.add(Sum(dim=1)) proj = {} for i in range(nhops): if use_bow: memory[i] = MemoryBoW(train_config) else: memory[i] = MemoryL(train_config) # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]" memory[i].nil_word = dictionary['nil'] model.add(Duplicate()) p = Parallel() p.add(memory[i]) if add_proj: print('We add linear layer between internal states') proj[i] = LinearNB(in_dim, in_dim) p.add(proj[i]) else: p.add(Identity()) model.add(p) model.add(AddTable()) if add_nonlin: print('We use non-linearity (RELU) to internal states') model.add(ReLU()) model.add(LinearNB(out_dim, voc_sz, True)) model.add(Softmax()) # Share weights if share_type == 1: # Type 1: adjacent weight tying print('We use adjacent weight tying') memory[0].emb_query.share(model.modules[0]) for i in range(1, nhops): memory[i].emb_query.share(memory[i - 1].emb_out) model.modules[-2].share(memory[len(memory) - 1].emb_out) elif share_type == 2: # Type 2: layer-wise weight tying print('We use layer-wise weight tying (RNN-style)') for i in range(1, nhops): memory[i].emb_query.share(memory[0].emb_query) memory[i].emb_out.share(memory[0].emb_out) if add_proj: for i in range(1, nhops): proj[i].share(proj[0]) # Cost loss = CrossEntropyLoss() loss.size_average = False loss.do_softmax_bprop = True model.modules[-1].skip_bprop = True return memory, model, loss
class Memory(Module): """ Memory: Query module = Parallel(LookupTable + Identity) + MatVecProd with transpose + Softmax Output module = Parallel(LookupTable + Identity) + MatVecProd """ def __init__(self, train_config): super(Memory, self).__init__() self.sz = train_config["sz"] self.voc_sz = train_config["voc_sz"] self.in_dim = train_config["in_dim"] self.out_dim = train_config["out_dim"] # TODO: Mark self.nil_word and self.data as None since they will be overriden eventually # In build.model.py, memory[i].nil_word = dictionary['nil']" self.nil_word = train_config["voc_sz"] self.config = train_config self.data = np.zeros((self.sz, train_config["bsz"]), np.float32) self.emb_query = None self.emb_out = None self.mod_query = None self.mod_out = None self.probs = None self.init_query_module() self.init_output_module() def init_query_module(self): self.emb_query = LookupTable(self.voc_sz, self.in_dim) p = Parallel() p.add(self.emb_query) p.add(Identity()) self.mod_query = Sequential() self.mod_query.add(p) self.mod_query.add(MatVecProd(True)) self.mod_query.add(Softmax()) def init_output_module(self): self.emb_out = LookupTable(self.voc_sz, self.out_dim) p = Parallel() p.add(self.emb_out) p.add(Identity()) self.mod_out = Sequential() self.mod_out.add(p) self.mod_out.add(MatVecProd(False)) def reset(self): self.data[:] = self.nil_word def put(self, data_row): self.data[1:, :] = self.data[:-1, :] # shift rows down self.data[0, :] = data_row # add the new data row on top def fprop(self, input_data): self.probs = self.mod_query.fprop([self.data, input_data]) self.output = self.mod_out.fprop([self.data, self.probs]) return self.output def bprop(self, input_data, grad_output): g1 = self.mod_out.bprop([self.data, self.probs], grad_output) g2 = self.mod_query.bprop([self.data, input_data], g1[1]) self.grad_input = g2[1] return self.grad_input def update(self, params): self.mod_out.update(params) self.mod_query.update(params) self.emb_out.weight.D[:, self.nil_word] = 0 def share(self, m): pass
def build_model(general_config): """ Build model NOTE: (for default config) 1) Model's architecture (embedding B) LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax 2) Memory's architecture a) Query module (embedding A) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax b) Output module (embedding C) Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd """ train_config = general_config.train_config dictionary = general_config.dictionary use_bow = general_config.use_bow nhops = general_config.nhops add_proj = general_config.add_proj share_type = general_config.share_type enable_time = general_config.enable_time add_nonlin = general_config.add_nonlin in_dim = train_config["in_dim"] out_dim = train_config["out_dim"] max_words = train_config["max_words"] voc_sz = train_config["voc_sz"] if not use_bow: train_config["weight"] = np.ones((in_dim, max_words), np.float32) for i in range(in_dim): for j in range(max_words): train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * \ (j + 1 - (max_words + 1) / 2) train_config["weight"] = \ 1 + 4 * train_config["weight"] / (in_dim * max_words) memory = {} model = Sequential() model.add(LookupTable(voc_sz, in_dim)) if not use_bow: if enable_time: model.add(ElemMult(train_config["weight"][:, :-1])) else: model.add(ElemMult(train_config["weight"])) model.add(Sum(dim=1)) proj = {} for i in range(nhops): if use_bow: memory[i] = MemoryBoW(train_config) else: memory[i] = MemoryL(train_config) # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]" memory[i].nil_word = dictionary['nil'] model.add(Duplicate()) p = Parallel() p.add(memory[i]) if add_proj: proj[i] = LinearNB(in_dim, in_dim) p.add(proj[i]) else: p.add(Identity()) model.add(p) model.add(AddTable()) if add_nonlin: model.add(ReLU()) model.add(LinearNB(out_dim, voc_sz, True)) model.add(Softmax()) # Share weights if share_type == 1: # Type 1: adjacent weight tying memory[0].emb_query.share(model.modules[0]) for i in range(1, nhops): memory[i].emb_query.share(memory[i - 1].emb_out) model.modules[-2].share(memory[len(memory) - 1].emb_out) elif share_type == 2: # Type 2: layer-wise weight tying for i in range(1, nhops): memory[i].emb_query.share(memory[0].emb_query) memory[i].emb_out.share(memory[0].emb_out) if add_proj: for i in range(1, nhops): proj[i].share(proj[0]) # Cost loss = CrossEntropyLoss() loss.size_average = False loss.do_softmax_bprop = True model.modules[-1].skip_bprop = True return memory, model, loss