Ejemplo n.º 1
0
    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        p = Parallel()
        p.add(self.emb_out)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 2
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        p = Parallel()
        p.add(self.emb_query)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Ejemplo n.º 3
0
class MemoryBoW(Memory):
    """
    MemoryBoW:
        Query module  = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd with transpose + Softmax
        Output module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd
    """
    def __init__(self, config):
        super(MemoryBoW, self).__init__(config)
        # 'sz' indicates number of sentences in a certain story
        # data with size [num_words, num_sentences, batch_size]
        #
        # After embedding, the matrix will become
        # [emb_dim, num_sentences, batch_size]
        self.data = np.zeros((config["max_words"], self.sz, config["bsz"]), np.float32)

    def init_query_module(self):
        """
        Input query with size      (num_words, num_questions)
        After embedding, with size (in_dim, num_questions)

        Which means that we compress each
        sentence into one word embedding
        """
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 4
0
    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 5
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Ejemplo n.º 6
0
    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        p = Parallel()
        p.add(self.emb_out)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 7
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        p = Parallel()
        p.add(self.emb_query)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Ejemplo n.º 8
0
class MemoryL(Memory):
    """
  MemoryL:
    Query module  = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd with transpose + Softmax
    Output module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd
  """
    def __init__(self, train_config):
        super(MemoryL, self).__init__(train_config)
        self.data = np.zeros(
            (train_config["max_words"], self.sz, train_config["bsz"]),
            np.float32)

    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 9
0
class MemoryL(Memory):
    """
    MemoryL:
        Query module  = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd with transpose + Softmax
        Output module = Parallel((LookupTable + ElemMult + Sum(1)) + Identity) + MatVecProd
    """
    def __init__(self, train_config):
        super(MemoryL, self).__init__(train_config)
        self.data = np.zeros((train_config["max_words"], self.sz, train_config["bsz"]), np.float32)

    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 10
0
    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 11
0
class MemoryBoW(Memory):
    """
  MemoryBoW:
    Query module  = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd with transpose + Softmax
    Output module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd
  """
    def __init__(self, config):
        super(MemoryBoW, self).__init__(config)
        #self.data = np.zeros((config.max_words, self.sz, config.bsz), np.float32)
        self.data = np.zeros((config['max_words'], self.sz, config['bsz']),
                             np.float32)

    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        s = Sequential()
        s.add(self.emb_out)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))
Ejemplo n.º 12
0
class MemoryBoW(Memory):
  """
  MemoryBoW:
    Query module  = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd with transpose + Softmax
    Output module = Parallel((LookupTable + Sum(1)) + Identity) + MatVecProd
  """
  def __init__(self, config):
    super(MemoryBoW, self).__init__(config)
    #self.data = np.zeros((config.max_words, self.sz, config.bsz), np.float32)
    self.data = np.zeros((config['max_words'], self.sz, config['bsz']), np.float32)

  def init_query_module(self):
    self.emb_query = LookupTable(self.voc_sz, self.in_dim)
    s = Sequential()
    s.add(self.emb_query)
    s.add(Sum(dim=1))

    p = Parallel()
    p.add(s)
    p.add(Identity())

    self.mod_query = Sequential()
    self.mod_query.add(p)
    self.mod_query.add(MatVecProd(True))
    self.mod_query.add(Softmax())

  def init_output_module(self):
    self.emb_out = LookupTable(self.voc_sz, self.out_dim)
    s = Sequential()
    s.add(self.emb_out)
    s.add(Sum(dim=1))

    p = Parallel()
    p.add(s)
    p.add(Identity())

    self.mod_out = Sequential()
    self.mod_out.add(p)
    self.mod_out.add(MatVecProd(False))
Ejemplo n.º 13
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Ejemplo n.º 14
0
    def init_query_module(self):
        """
        Input query with size      (num_words, num_questions)
        After embedding, with size (in_dim, num_questions)

        Which means that we compress each
        sentence into one word embedding
        """
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Ejemplo n.º 15
0
class Memory(Module):
    """
  Memory:
    Query module  = Parallel(LookupTable + Identity) + MatVecProd with transpose + Softmax
    Output module = Parallel(LookupTable + Identity) + MatVecProd
  """
    def __init__(self, train_config):
        super(Memory, self).__init__()

        self.sz = train_config["sz"]
        self.voc_sz = train_config["voc_sz"]
        self.in_dim = train_config["in_dim"]
        self.out_dim = train_config["out_dim"]

        # TODO: Mark self.nil_word and self.data as None since they will be overriden eventually
        # In build.model.py, memory[i].nil_word = dictionary['nil']"
        self.nil_word = train_config["voc_sz"]
        self.config = train_config
        self.data = np.zeros((self.sz, train_config["bsz"]), np.float32)

        self.emb_query = None
        self.emb_out = None
        self.mod_query = None
        self.mod_out = None
        self.probs = None

        self.init_query_module()
        self.init_output_module()

    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        p = Parallel()
        p.add(self.emb_query)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        p = Parallel()
        p.add(self.emb_out)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))

    def reset(self):
        self.data[:] = self.nil_word

    def put(self, data_row):
        self.data[1:, :] = self.data[:-1, :]  # shift rows down
        self.data[0, :] = data_row  # add the new data row on top

    def fprop(self, input_data):
        self.probs = self.mod_query.fprop([self.data, input_data])
        self.output = self.mod_out.fprop([self.data, self.probs])
        return self.output

    def bprop(self, input_data, grad_output):
        g1 = self.mod_out.bprop([self.data, self.probs], grad_output)
        g2 = self.mod_query.bprop([self.data, input_data], g1[1])
        self.grad_input = g2[1]
        return self.grad_input

    def update(self, params):
        self.mod_out.update(params)
        self.mod_query.update(params)
        self.emb_out.weight.D[:, self.nil_word] = 0

    def share(self, m):
        pass
Ejemplo n.º 16
0
def build_model(general_config):
    """
    Build model

    NOTE: (for default config)
    1) Model's architecture (embedding B)
        LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax

    2) Memory's architecture
        a) Query module (embedding A)
            Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax

        b) Output module (embedding C)
            Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd
    """
    train_config = general_config.train_config
    dictionary   = general_config.dictionary
    use_bow      = general_config.use_bow
    nhops        = general_config.nhops
    add_proj     = general_config.add_proj
    share_type   = general_config.share_type
    enable_time  = general_config.enable_time
    add_nonlin   = general_config.add_nonlin

    in_dim    = train_config["in_dim"]
    out_dim   = train_config["out_dim"]
    max_words = train_config["max_words"]
    voc_sz    = train_config["voc_sz"]

    if not use_bow:
        train_config["weight"] = np.ones((in_dim, max_words), np.float32)
        for i in range(in_dim):
            for j in range(max_words):
                train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * \
                                               (j + 1 - (max_words + 1) / 2)
        train_config["weight"] = \
            1 + 4 * train_config["weight"] / (in_dim * max_words)

    memory = {}
    model = Sequential()
    model.add(LookupTable(voc_sz, in_dim))
    if not use_bow:
        if enable_time:
            model.add(ElemMult(train_config["weight"][:, :-1]))
        else:
            model.add(ElemMult(train_config["weight"]))

    model.add(Sum(dim=1))

    proj = {}
    for i in range(nhops):
        if use_bow:
            memory[i] = MemoryBoW(train_config)
        else:
            memory[i] = MemoryL(train_config)

        # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]"
        memory[i].nil_word = dictionary['nil']
        model.add(Duplicate())
        p = Parallel()
        p.add(memory[i])

        if add_proj:
            proj[i] = LinearNB(in_dim, in_dim)
            p.add(proj[i])
        else:
            p.add(Identity())

        model.add(p)
        model.add(AddTable())
        if add_nonlin:
            model.add(ReLU())

    model.add(LinearNB(out_dim, voc_sz, True))
    model.add(Softmax())

    # Share weights
    if share_type == 1:
        # Type 1: adjacent weight tying
        memory[0].emb_query.share(model.modules[0])
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[i - 1].emb_out)

        model.modules[-2].share(memory[len(memory) - 1].emb_out)

    elif share_type == 2:
        # Type 2: layer-wise weight tying
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[0].emb_query)
            memory[i].emb_out.share(memory[0].emb_out)

    if add_proj:
        for i in range(1, nhops):
            proj[i].share(proj[0])

    # Cost
    loss = CrossEntropyLoss()
    loss.size_average = False
    loss.do_softmax_bprop = True
    model.modules[-1].skip_bprop = True

    return memory, model, loss
Ejemplo n.º 17
0
def build_model(general_config):
    """
  Build model

  NOTE: (for default config)
  1) Model's architecture (embedding B)
    LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax

  2) Memory's architecture
    a) Query module (embedding A)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax

    b) Output module (embedding C)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd
  """
    train_config = general_config.train_config
    dictionary = general_config.dictionary
    use_bow = general_config.use_bow
    nhops = general_config.nhops
    add_proj = general_config.add_proj
    share_type = general_config.share_type
    enable_time = general_config.enable_time
    add_nonlin = general_config.add_nonlin

    in_dim = train_config["in_dim"]
    out_dim = train_config["out_dim"]
    max_words = train_config["max_words"]
    voc_sz = train_config["voc_sz"]

    if not use_bow:
        print('We use PE')
        train_config["weight"] = np.ones((in_dim, max_words), np.float32)
        for i in range(in_dim):
            for j in range(max_words):
                train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * (
                    j + 1 - (max_words + 1) / 2)
        train_config["weight"] = 1 + 4 * train_config["weight"] / (in_dim *
                                                                   max_words)

    memory = {}
    model = Sequential()
    model.add(LookupTable(voc_sz, in_dim))
    if not use_bow:
        if enable_time:
            print('We use TE')
            model.add(ElemMult(train_config["weight"][:, :-1]))
        else:
            model.add(ElemMult(train_config["weight"]))

    model.add(Sum(dim=1))

    proj = {}
    for i in range(nhops):
        if use_bow:
            memory[i] = MemoryBoW(train_config)
        else:
            memory[i] = MemoryL(train_config)

        # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]"
        memory[i].nil_word = dictionary['nil']
        model.add(Duplicate())
        p = Parallel()
        p.add(memory[i])

        if add_proj:
            print('We add linear layer between internal states')
            proj[i] = LinearNB(in_dim, in_dim)
            p.add(proj[i])
        else:
            p.add(Identity())

        model.add(p)
        model.add(AddTable())
        if add_nonlin:
            print('We use non-linearity (RELU) to internal states')
            model.add(ReLU())

    model.add(LinearNB(out_dim, voc_sz, True))
    model.add(Softmax())

    # Share weights
    if share_type == 1:
        # Type 1: adjacent weight tying
        print('We use adjacent weight tying')
        memory[0].emb_query.share(model.modules[0])
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[i - 1].emb_out)

        model.modules[-2].share(memory[len(memory) - 1].emb_out)

    elif share_type == 2:
        # Type 2: layer-wise weight tying
        print('We use layer-wise weight tying (RNN-style)')
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[0].emb_query)
            memory[i].emb_out.share(memory[0].emb_out)

    if add_proj:
        for i in range(1, nhops):
            proj[i].share(proj[0])

    # Cost
    loss = CrossEntropyLoss()
    loss.size_average = False
    loss.do_softmax_bprop = True
    model.modules[-1].skip_bprop = True

    return memory, model, loss
Ejemplo n.º 18
0
class Memory(Module):
    """
    Memory:
        Query module  = Parallel(LookupTable + Identity) + MatVecProd with transpose + Softmax
        Output module = Parallel(LookupTable + Identity) + MatVecProd
    """
    def __init__(self, train_config):
        super(Memory, self).__init__()

        self.sz        = train_config["sz"]
        self.voc_sz    = train_config["voc_sz"]
        self.in_dim    = train_config["in_dim"]
        self.out_dim   = train_config["out_dim"]

        # TODO: Mark self.nil_word and self.data as None since they will be overriden eventually
        # In build.model.py, memory[i].nil_word = dictionary['nil']"
        self.nil_word  = train_config["voc_sz"]
        self.config    = train_config
        self.data      = np.zeros((self.sz, train_config["bsz"]), np.float32)

        self.emb_query = None
        self.emb_out   = None
        self.mod_query = None
        self.mod_out   = None
        self.probs     = None

        self.init_query_module()
        self.init_output_module()

    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        p = Parallel()
        p.add(self.emb_query)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())

    def init_output_module(self):
        self.emb_out = LookupTable(self.voc_sz, self.out_dim)
        p = Parallel()
        p.add(self.emb_out)
        p.add(Identity())

        self.mod_out = Sequential()
        self.mod_out.add(p)
        self.mod_out.add(MatVecProd(False))

    def reset(self):
        self.data[:] = self.nil_word

    def put(self, data_row):
        self.data[1:, :] = self.data[:-1, :]  # shift rows down
        self.data[0, :] = data_row            # add the new data row on top

    def fprop(self, input_data):
        self.probs = self.mod_query.fprop([self.data, input_data])
        self.output = self.mod_out.fprop([self.data, self.probs])
        return self.output

    def bprop(self, input_data, grad_output):
        g1 = self.mod_out.bprop([self.data, self.probs], grad_output)
        g2 = self.mod_query.bprop([self.data, input_data], g1[1])
        self.grad_input = g2[1]
        return self.grad_input

    def update(self, params):
        self.mod_out.update(params)
        self.mod_query.update(params)
        self.emb_out.weight.D[:, self.nil_word] = 0

    def share(self, m):
        pass