Esempio n. 1
0
def train_linear_start(train_story, train_questions, train_qstory, memory,
                       model, loss, general_config):

    train_config = general_config.train_config

    # Remove softmax from memory
    for i in range(general_config.nhops):
        memory[i].mod_query.modules.pop()

    # Save settings
    nepochs2 = general_config.nepochs
    lrate_decay_step2 = general_config.lrate_decay_step
    init_lrate2 = train_config["init_lrate"]

    # Add new settings
    general_config.nepochs = general_config.ls_nepochs
    general_config.lrate_decay_step = general_config.ls_lrate_decay_step
    train_config["init_lrate"] = general_config.ls_init_lrate

    # Train with new settings
    train(train_story, train_questions, train_qstory, memory, model, loss,
          general_config)

    # Add softmax back
    for i in range(general_config.nhops):
        memory[i].mod_query.add(Softmax())

    # Restore old settings
    general_config.nepochs = nepochs2
    general_config.lrate_decay_step = lrate_decay_step2
    train_config["init_lrate"] = init_lrate2

    # Train with old settings
    train(train_story, train_questions, train_qstory, memory, model, loss,
          general_config)
Esempio n. 2
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        p = Parallel()
        p.add(self.emb_query)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Esempio n. 3
0
    def init_query_module(self):
        self.emb_query = LookupTable(self.voc_sz, self.in_dim)
        s = Sequential()
        s.add(self.emb_query)
        s.add(ElemMult(self.config["weight"]))
        s.add(Sum(dim=1))

        p = Parallel()
        p.add(s)
        p.add(Identity())

        self.mod_query = Sequential()
        self.mod_query.add(p)
        self.mod_query.add(MatVecProd(True))
        self.mod_query.add(Softmax())
Esempio n. 4
0
def train_linear_start(train_story, train_questions, train_qstory, memory,
                       model, loss, general_config):
    """
    Run Memory Network Training with Linear Start.

    :param train_story: Tensor of Stories (Shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES))
    :param train_questions: Tensor of Questions (Shape: (14 (see parser.py), NUM_SAMPLES))
    :param train_qstory: Tensor of Q Indices within story (Shape: (SENTENCE_SIZE, NUM_SAMPLES))
    :param memory: Memory (Story) Network
    :param model: Sequential Query Network
    :param loss: Loss Network
    :param general_config: bAbI Configuration
    """
    train_config = general_config.train_config

    # Remove softmax from memory
    for i in range(general_config.nhops):
        memory[i].mod_query.modules.pop()

    # Save settings
    nepochs2 = general_config.nepochs
    lrate_decay_step2 = general_config.lrate_decay_step
    init_lrate2 = train_config["init_lrate"]

    # Add new settings
    general_config.nepochs = general_config.ls_nepochs
    general_config.lrate_decay_step = general_config.ls_lrate_decay_step
    train_config["init_lrate"] = general_config.ls_init_lrate

    # Train with new settings
    train(train_story, train_questions, train_qstory, memory, model, loss,
          general_config)

    # Add softmax back
    for i in range(general_config.nhops):
        memory[i].mod_query.add(Softmax())

    # Restore old settings
    general_config.nepochs = nepochs2
    general_config.lrate_decay_step = lrate_decay_step2
    train_config["init_lrate"] = init_lrate2

    # Train with old settings
    train(train_story, train_questions, train_qstory, memory, model, loss,
          general_config)
Esempio n. 5
0
def build_model(general_config):
    """
  Build model

  NOTE: (for default config)
  1) Model's architecture (embedding B)
    LookupTable -> ElemMult -> Sum -> [ Duplicate -> { Parallel -> Memory -> Identity } -> AddTable ] -> LinearNB -> Softmax

  2) Memory's architecture
    a) Query module (embedding A)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd -> Softmax

    b) Output module (embedding C)
      Parallel -> { LookupTable + ElemMult + Sum } -> Identity -> MatVecProd
  """
    train_config = general_config.train_config
    dictionary = general_config.dictionary
    use_bow = general_config.use_bow
    nhops = general_config.nhops
    add_proj = general_config.add_proj
    share_type = general_config.share_type
    enable_time = general_config.enable_time
    add_nonlin = general_config.add_nonlin

    in_dim = train_config["in_dim"]
    out_dim = train_config["out_dim"]
    max_words = train_config["max_words"]
    voc_sz = train_config["voc_sz"]

    if not use_bow:
        print('We use PE')
        train_config["weight"] = np.ones((in_dim, max_words), np.float32)
        for i in range(in_dim):
            for j in range(max_words):
                train_config["weight"][i][j] = (i + 1 - (in_dim + 1) / 2) * (
                    j + 1 - (max_words + 1) / 2)
        train_config["weight"] = 1 + 4 * train_config["weight"] / (in_dim *
                                                                   max_words)

    memory = {}
    model = Sequential()
    model.add(LookupTable(voc_sz, in_dim))
    if not use_bow:
        if enable_time:
            print('We use TE')
            model.add(ElemMult(train_config["weight"][:, :-1]))
        else:
            model.add(ElemMult(train_config["weight"]))

    model.add(Sum(dim=1))

    proj = {}
    for i in range(nhops):
        if use_bow:
            memory[i] = MemoryBoW(train_config)
        else:
            memory[i] = MemoryL(train_config)

        # Override nil_word which is initialized in "self.nil_word = train_config["voc_sz"]"
        memory[i].nil_word = dictionary['nil']
        model.add(Duplicate())
        p = Parallel()
        p.add(memory[i])

        if add_proj:
            print('We add linear layer between internal states')
            proj[i] = LinearNB(in_dim, in_dim)
            p.add(proj[i])
        else:
            p.add(Identity())

        model.add(p)
        model.add(AddTable())
        if add_nonlin:
            print('We use non-linearity (RELU) to internal states')
            model.add(ReLU())

    model.add(LinearNB(out_dim, voc_sz, True))
    model.add(Softmax())

    # Share weights
    if share_type == 1:
        # Type 1: adjacent weight tying
        print('We use adjacent weight tying')
        memory[0].emb_query.share(model.modules[0])
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[i - 1].emb_out)

        model.modules[-2].share(memory[len(memory) - 1].emb_out)

    elif share_type == 2:
        # Type 2: layer-wise weight tying
        print('We use layer-wise weight tying (RNN-style)')
        for i in range(1, nhops):
            memory[i].emb_query.share(memory[0].emb_query)
            memory[i].emb_out.share(memory[0].emb_out)

    if add_proj:
        for i in range(1, nhops):
            proj[i].share(proj[0])

    # Cost
    loss = CrossEntropyLoss()
    loss.size_average = False
    loss.do_softmax_bprop = True
    model.modules[-1].skip_bprop = True

    return memory, model, loss
Esempio n. 6
0
    a1 = np.random.rand(2).astype('f')
    b = torch.from_numpy(a1).type(FloatTensor)
    try:
        a2 = b.data.numpy()
        assert np.array_equal(a1, a2) is True
    except AssertionError:
        tests[0] = False

if TEST1:
    tests[1] = True
    for i in range(10):
        M = np.random.rand(224, 32)

        input_data = M
        input_data_torch = torch.from_numpy(M).type(FloatTensor)
        sfmx = Softmax()
        sfmx_torch = nn.Softmax(dim=0)
        result_1 = sfmx.fprop(input_data)
        result_2 = sfmx_torch.forward(input_data_torch)
        try:
            result_2_np = result_2.data.numpy()
            assert np.allclose(result_1, result_2_np)
        except AssertionError:
            tests[1] = False

if TEST2:
    tests[2] = True
    for i in range(10):
        M = np.random.rand(*matrix_batch_dim)
        V = np.random.rand(*vect_batch_dim)
Esempio n. 7
0
def train_linear_start(train_story,
                       train_questions,
                       train_qstory,
                       memory,
                       model,
                       loss,
                       general_config,
                       log_path='./'):

    train_config = general_config.train_config

    # Remove softmax from memory
    for i in range(general_config.nhops):
        memory[i].mod_query.modules.pop()

    # Save settings
    nepochs2 = general_config.nepochs
    lrate_decay_step2 = general_config.lrate_decay_step
    init_lrate2 = train_config["init_lrate"]

    # Add new settings
    general_config.nepochs = general_config.ls_nepochs
    general_config.lrate_decay_step = general_config.ls_lrate_decay_step
    train_config["init_lrate"] = general_config.ls_init_lrate
    print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\
          (general_config.nepochs, train_config["init_lrate"], general_config.lrate_decay_step))
    sys.stdout.flush()
    #print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\
    #      (nepochs2, init_lrate2, lrate_decay_step2)); sys.stdout.flush()

    # declear logger
    train_logger = open(os.path.join(log_path, 'train.log'), 'w')
    train_logger.write('epoch batch_iter lr loss err\n')
    train_logger.flush()
    val_logger = open(os.path.join(log_path, 'val.log'), 'w')
    val_logger.write('epoch batch_iter lr loss err\n')
    val_logger.flush()

    # Train with new settings
    global_batch_iter = 0
    best_val_loss = 1000000.
    best_val_err = 1000000.
    train_logger, val_logger, best_model, best_memory, global_batch_iter, best_val_loss, best_val_err = \
      train(train_story,
            train_questions,
            train_qstory,
            memory,
            model,
            loss,
            general_config,
            train_logger,
            val_logger,
            global_batch_iter,
            best_val_loss,
            best_val_err)

    # When the validation loss stopped decreasing,
    # the softmax layers were re-inserted and training recommenced.
    # Add softmax back
    for i in range(general_config.nhops):
        memory[i].mod_query.add(Softmax())

    # Restore old settings
    general_config.nepochs = nepochs2
    general_config.lrate_decay_step = lrate_decay_step2
    train_config["init_lrate"] = init_lrate2
    print('Switching new lr config, nepoch: %d, lr: %f, decay: %f' %\
          (general_config.nepochs, train_config["init_lrate"], general_config.lrate_decay_step))
    sys.stdout.flush()

    # Train with old settings
    train_logger, val_logger, best_model, best_memory, _, _, _ = \
      train(train_story,
            train_questions,
            train_qstory,
            memory,
            model,
            loss,
            general_config,
            train_logger,
            val_logger,
            global_batch_iter,
            best_val_loss,
            best_val_err)

    train_logger.close()
    val_logger.close()

    return best_model, best_memory