コード例 #1
0
def train(print_loss_total,
          print_act_total,
          print_grad_total,
          input_tensor,
          target_tensor,
          bs_tensor,
          db_tensor,
          name=None):
    # create an empty matrix with padding tokens
    input_tensor, input_lengths = util.padSequence(input_tensor)
    target_tensor, target_lengths = util.padSequence(target_tensor)
    bs_tensor = torch.tensor(bs_tensor, dtype=torch.float, device=device)
    db_tensor = torch.tensor(db_tensor, dtype=torch.float, device=device)

    loss, loss_acts, grad = model.train(input_tensor, input_lengths,
                                        target_tensor, target_lengths,
                                        db_tensor, bs_tensor, name)

    #print(loss, loss_acts)
    print_loss_total += loss
    print_act_total += loss_acts
    print_grad_total += grad

    model.global_step += 1
    model.sup_loss = torch.zeros(1)

    return print_loss_total, print_act_total, print_grad_total
コード例 #2
0
def predict(model, prev_state, prev_active_domain, state, dic):
    start_time = time.time()
    model.beam_search = False
    input_tensor = []
    bs_tensor = []
    db_tensor = []

    usr = state['history'][-1][-1]

    prev_state = deepcopy(prev_state['belief_state'])
    state = deepcopy(state['belief_state'])

    mark_not_mentioned(prev_state)
    mark_not_mentioned(state)

    words = usr.split()
    usr = delexicalize.delexicalise(' '.join(words), dic)

    # parsing reference number GIVEN belief state
    usr = delexicaliseReferenceNumber(usr, state)

    # changes to numbers only here
    digitpat = re.compile('\d+')
    usr = re.sub(digitpat, '[value_count]', usr)
    # dialogue = fixDelex(dialogue_name, dialogue, data2, idx, idx_acts)

    # add database pointer
    pointer_vector, top_results, num_results = addDBPointer(state)
    # add booking pointer
    pointer_vector = addBookingPointer(state, pointer_vector)
    belief_summary = get_summary_bstate(state)

    tensor = [
        model.input_word2index(word)
        for word in normalize(usr).strip(' ').split(' ')
    ] + [util.EOS_token]
    input_tensor.append(torch.LongTensor(tensor))
    bs_tensor.append(belief_summary)  #
    db_tensor.append(pointer_vector)  # db results and booking
    # bs_tensor.append([0.] * 94) #
    # db_tensor.append([0.] * 30) # db results and booking
    # create an empty matrix with padding tokens
    input_tensor, input_lengths = util.padSequence(input_tensor)
    bs_tensor = torch.tensor(bs_tensor, dtype=torch.float, device=device)
    db_tensor = torch.tensor(db_tensor, dtype=torch.float, device=device)

    output_words, loss_sentence = model.predict(input_tensor, input_lengths,
                                                input_tensor, input_lengths,
                                                db_tensor, bs_tensor)
    active_domain = get_active_domain(prev_active_domain, prev_state, state)
    if active_domain is not None and active_domain in num_results:
        num_results = num_results[active_domain]
    else:
        num_results = 0
    if active_domain is not None and active_domain in top_results:
        top_results = {active_domain: top_results[active_domain]}
    else:
        top_results = {}
    response = populate_template(output_words[0], top_results, num_results,
                                 state)
    return response, active_domain
コード例 #3
0
ファイル: test.py プロジェクト: kiseliu/NeuralPipeline_DSTC8
def decode(num=1):
    model, val_dials, test_dials = loadModelAndData(num)

    start_time = time.time()
    for ii in range(1):
        if ii == 0:
            print(50 * '-' + 'GREEDY')
            model.beam_search = False
        else:
            print(50 * '-' + 'BEAM')
            model.beam_search = True

        # VALIDATION
        val_dials_gen = {}
        valid_loss = 0
        for name, val_file in val_dials.items():
            input_tensor = []
            target_tensor = []
            bs_tensor = []
            db_tensor = []
            input_tensor, target_tensor, bs_tensor, db_tensor = util.loadDialogue(
                model, val_file, input_tensor, target_tensor, bs_tensor,
                db_tensor)
            # create an empty matrix with padding tokens
            input_tensor, input_lengths = util.padSequence(input_tensor)
            target_tensor, target_lengths = util.padSequence(target_tensor)
            bs_tensor = torch.tensor(bs_tensor,
                                     dtype=torch.float,
                                     device=device)
            db_tensor = torch.tensor(db_tensor,
                                     dtype=torch.float,
                                     device=device)

            pprint(input_tensor)
            pprint(target_tensor)
            print(bs_tensor)
            print(db_tensor)
            # output_words, loss_sentence = model.predict(input_tensor, input_lengths, target_tensor, target_lengths,
            #                                             db_tensor, bs_tensor)
            output_words, loss_sentence = model.predict(
                input_tensor, input_lengths, input_tensor, input_lengths,
                db_tensor, bs_tensor)
            print(output_words)

            valid_loss += 0
            val_dials_gen[name] = output_words

        print('Current VALID LOSS:', valid_loss)
        with open(args.valid_output + 'val_dials_gen.json', 'w') as outfile:
            json.dump(val_dials_gen, outfile)
        evaluateModel(val_dials_gen, val_dials, mode='valid')

        # TESTING
        test_dials_gen = {}
        test_loss = 0
        for name, test_file in test_dials.items():
            input_tensor = []
            target_tensor = []
            bs_tensor = []
            db_tensor = []
            input_tensor, target_tensor, bs_tensor, db_tensor = util.loadDialogue(
                model, test_file, input_tensor, target_tensor, bs_tensor,
                db_tensor)
            # create an empty matrix with padding tokens
            input_tensor, input_lengths = util.padSequence(input_tensor)
            target_tensor, target_lengths = util.padSequence(target_tensor)
            bs_tensor = torch.tensor(bs_tensor,
                                     dtype=torch.float,
                                     device=device)
            db_tensor = torch.tensor(db_tensor,
                                     dtype=torch.float,
                                     device=device)

            output_words, loss_sentence = model.predict(
                input_tensor, input_lengths, target_tensor, target_lengths,
                db_tensor, bs_tensor)
            test_loss += 0
            test_dials_gen[name] = output_words

        test_loss /= len(test_dials)
        print('Current TEST LOSS:', test_loss)
        with open(args.decode_output + 'test_dials_gen.json', 'w') as outfile:
            json.dump(test_dials_gen, outfile)
        evaluateModel(test_dials_gen, test_dials, mode='test')

    print('TIME:', time.time() - start_time)
コード例 #4
0
def trainIters(model, n_epochs=10, args=args):
    prev_min_loss, early_stop_count = 1 << 30, args.early_stop_count
    start = time.time()

    for epoch in range(1, n_epochs + 1):
        print_loss_total = 0
        print_grad_total = 0
        print_act_total = 0  # Reset every print_every
        start_time = time.time()
        # watch out where do you put it
        model.optimizer = Adam(lr=args.lr_rate,
                               params=filter(lambda x: x.requires_grad,
                                             model.parameters()),
                               weight_decay=args.l2_norm)
        model.optimizer_policy = Adam(lr=args.lr_rate,
                                      params=filter(lambda x: x.requires_grad,
                                                    model.policy.parameters()),
                                      weight_decay=args.l2_norm)

        dials = list(train_dials.keys())
        random.shuffle(dials)
        input_tensor = []
        target_tensor = []
        bs_tensor = []
        db_tensor = []
        for name in dials:
            val_file = train_dials[name]
            model.optimizer.zero_grad()
            model.optimizer_policy.zero_grad()

            input_tensor, target_tensor, bs_tensor, db_tensor = util.loadDialogue(
                model, val_file, input_tensor, target_tensor, bs_tensor,
                db_tensor)

            if len(db_tensor) > args.batch_size:
                print_loss_total, print_act_total, print_grad_total = train(
                    print_loss_total, print_act_total, print_grad_total,
                    input_tensor, target_tensor, bs_tensor, db_tensor)
                input_tensor = []
                target_tensor = []
                bs_tensor = []
                db_tensor = []

        print_loss_avg = print_loss_total / len(train_dials)
        print_act_total_avg = print_act_total / len(train_dials)
        print_grad_avg = print_grad_total / len(train_dials)
        print('TIME:', time.time() - start_time)
        print(
            'Time since %s (Epoch:%d %d%%) Loss: %.4f, Loss act: %.4f, Grad: %.4f'
            %
            (util.timeSince(start, epoch / n_epochs), epoch, epoch / n_epochs *
             100, print_loss_avg, print_act_total_avg, print_grad_avg))

        # VALIDATION
        valid_loss = 0
        for name, val_file in val_dials.items():
            input_tensor = []
            target_tensor = []
            bs_tensor = []
            db_tensor = []
            input_tensor, target_tensor, bs_tensor, db_tensor = util.loadDialogue(
                model, val_file, input_tensor, target_tensor, bs_tensor,
                db_tensor)
            # create an empty matrix with padding tokens
            input_tensor, input_lengths = util.padSequence(input_tensor)
            target_tensor, target_lengths = util.padSequence(target_tensor)
            bs_tensor = torch.tensor(bs_tensor,
                                     dtype=torch.float,
                                     device=device)
            db_tensor = torch.tensor(db_tensor,
                                     dtype=torch.float,
                                     device=device)

            proba, _, _ = model.forward(input_tensor, input_lengths,
                                        target_tensor, target_lengths,
                                        db_tensor, bs_tensor)
            proba = proba.view(-1, model.vocab_size)  # flatten all predictions
            loss = model.gen_criterion(proba, target_tensor.view(-1))
            valid_loss += loss.item()

        valid_loss /= len(val_dials)
        print('Current Valid LOSS:', valid_loss)

        model.saveModel(epoch)