예제 #1
0
def predict(model, modules, consts, options):
    print "start predicting,"
    options["has_y"] = TESTING_DATASET_CLS.HAS_Y
    if options["beam_decoding"]:
        print "using beam search"
    else:
        print "using greedy search"
    rebuild_dir(cfg.cc.BEAM_SUMM_PATH)
    rebuild_dir(cfg.cc.BEAM_GT_PATH)
    rebuild_dir(cfg.cc.GROUND_TRUTH_PATH)
    rebuild_dir(cfg.cc.SUMM_PATH)

    print "loading test set..."
    xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "ibm.pkl", "r"))
    batch_list, num_files, num_batches = datar.batched(len(xy_list), options,
                                                       consts)

    print "num_files = ", num_files, ", num_batches = ", num_batches

    running_start = time.time()
    partial_num = 0
    total_num = 0
    si = 0
    for idx_batch in xrange(num_batches):
        test_idx = batch_list[idx_batch]
        batch_raw = [xy_list[xy_idx] for xy_idx in test_idx]
        batch = datar.get_data(batch_raw, modules, consts, options)

        x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words)

        word_emb, dec_state = model.encode(
            torch.LongTensor(x).cuda(),
            torch.LongTensor(len_x).cuda(),
            torch.FloatTensor(x_mask).cuda())

        if options["beam_decoding"]:
            for idx_s in xrange(word_emb.size(1)):
                inputx = (torch.LongTensor(x_ext[:, idx_s]).cuda(), word_emb[:, idx_s, :], dec_state[idx_s, :],\
                          torch.FloatTensor(x_mask[:, idx_s, :]).cuda(), y[:, idx_s], [len_y[idx_s]], oy[idx_s],\
                          batch.max_ext_len, oovs[idx_s])
                beam_decode_copy(si, inputx, model, modules, consts, options)
                si += 1
        else:
            inputx = (word_emb, dec_state, torch.FloatTensor(x_mask).cuda(), y,
                      len_y)
            greedy_decode(test_idx, inputx, model, modules, consts, options)

        testing_batch_size = len(test_idx)
        partial_num += testing_batch_size
        total_num += testing_batch_size
        if partial_num >= consts["testing_print_size"]:
            print total_num, "summs are generated"
            partial_num = 0
    print si, total_num
예제 #2
0
def run(existing_model_name=None, w=64):
    modules, consts, options = init_modules()

    if options["is_predicting"]:
        need_load_model = True
        training_model = False
        predict_model = True
    else:
        need_load_model = True
        training_model = True
        predict_model = False

    print_basic_info(modules, consts, options)

    if training_model:
        print("loading train set...")
        if options["is_debugging"]:
            xy_list = pickle.load(
                open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "rb"))
        else:
            xy_list = pickle.load(
                open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb"))
        batch_list, num_files, num_batches = datar.batched(
            len(xy_list), options, consts)
        print("num_files = ", num_files, ", num_batches = ", num_batches)

    running_start = time.time()
    if True:  #TODO: refactor
        print("compiling model ...")
        model = Model(modules, consts, options)
        if options["cuda"]:
            model.cuda()
        optimizer = torch.optim.Adagrad(model.parameters(),
                                        lr=consts["lr"],
                                        initial_accumulator_value=0.1)

        model_name = "".join(["cnndm.s2s.", options["cell"]])
        existing_epoch = 0
        if need_load_model:
            if existing_model_name == None:
                existing_model_name = "cnndm.s2s.transformer.gpu0.epoch27.2"
            print("loading existed model:", existing_model_name)
            model, optimizer = load_model(
                cfg.cc.MODEL_PATH + existing_model_name, model, optimizer)

        if training_model:
            print("start training model ")
            model.train()
            print_size = num_files // consts[
                "print_time"] if num_files >= consts[
                    "print_time"] else num_files

            last_total_error = float("inf")
            print("max epoch:", consts["max_epoch"])
            for epoch in range(0, consts["max_epoch"]):
                print("epoch: ", epoch + existing_epoch)
                num_partial = 1
                total_error = 0.0
                error_c = 0.0
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                batch_list, num_files, num_batches = datar.batched(
                    len(xy_list), options, consts)
                used_batch = 0.
                for idx_batch in range(num_batches):
                    train_idx = batch_list[idx_batch]
                    batch_raw = [xy_list[xy_idx] for xy_idx in train_idx]
                    if len(batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(batch_raw)
                    batch = datar.get_data(batch_raw, modules, consts, options)

                    # print(batch.x.shape,attention_mask.shape,batch.y.shape,batch.y_ext.shape)
                    model.zero_grad()

                    y_pred, cost = model(torch.LongTensor(batch.x).to(options["device"]),\
                                   torch.LongTensor(batch.y_inp).to(options["device"]),\
                                   torch.LongTensor(batch.y).to(options["device"]),\
                                   torch.FloatTensor(batch.x_mask).to(options["device"]),\
                                   torch.FloatTensor(batch.y_mask).to(options["device"]),\
                                   torch.LongTensor(batch.x_ext).to(options["device"]),\
                                   torch.LongTensor(batch.y_ext).to(options["device"]),\
                                   batch.max_ext_len)

                    cost.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   consts["norm_clip"])
                    optimizer.step()

                    cost = cost.item()
                    total_error += cost
                    used_batch += 1
                    partial_num_files += consts["batch_size"]
                    if partial_num_files // print_size == 1 and idx_batch < num_batches:
                        print (idx_batch + 1, "/" , num_batches, "batches have been processed,", \
                                "average cost until now:", "cost =", total_error / used_batch, ",", \
                                "cost_c =", error_c / used_batch, ",", \
                                "time:", time.time() - partial_start)
                        partial_num_files = 0
                        # if not options["is_debugging"]:
                        #     print("save model... ",)
                        #     # file_name =  model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial)
                        #     file_name =  model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial)

                        #     save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer)
                        #     if options["fire"]:
                        #         shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/")

                        # print("finished")
                        num_partial += 1
                print ("in this epoch, total average cost =", total_error / used_batch, ",", \
                        "cost_c =", error_c / used_batch, ",",\
                        "time:", time.time() - epoch_start)

                print_sent_dec(y_pred, batch.y, batch.y_mask,
                               batch.x_ext_words, modules, consts, options,
                               local_batch_size)

                if last_total_error > total_error or options["is_debugging"]:
                    last_total_error = total_error
                    if not options["is_debugging"]:
                        print("save model... ", )
                        # file_name =  model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial)
                        file_name = model_name + ".gpu" + str(
                            consts["idx_gpu"]) + ".best_model_baseline"

                        save_model(cfg.cc.MODEL_PATH + file_name, model,
                                   optimizer)
                        if options["fire"]:
                            shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/")

                        print("finished")
                else:
                    print("optimization finished")
                    break

            print("save final model... "),
            file_name = model_name + ".final.gpu" + str(
                consts["idx_gpu"]) + ".epoch" + str(
                    epoch // consts["save_epoch"] +
                    existing_epoch) + "." + str(num_partial)
            save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer)
            if options["fire"]:
                shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/")

            print("finished")
        else:
            print("skip training model")

        if predict_model:
            predict(model, modules, consts, options)
    print("Finished, time:", time.time() - running_start)
예제 #3
0
def predict(model, modules, consts, options):
    print("start predicting,")
    model.eval()
    options["has_y"] = TESTING_DATASET_CLS.HAS_Y
    if options["beam_decoding"]:
        print("using beam search")
    else:
        print("using greedy search")
    rebuild_dir(cfg.cc.BEAM_SUMM_PATH)
    rebuild_dir(cfg.cc.BEAM_GT_PATH)
    rebuild_dir(cfg.cc.GROUND_TRUTH_PATH)
    rebuild_dir(cfg.cc.SUMM_PATH)

    print("loading test set...")
    if options["model_selection"]:
        xy_list = pickle.load(
            open(cfg.cc.VALIDATE_DATA_PATH + "pj1000.pkl", "rb"))
    else:
        xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl",
                                   "rb"))
    batch_list, num_files, num_batches = datar.batched(len(xy_list), options,
                                                       consts)

    print("num_files = ", num_files, ", num_batches = ", num_batches)

    running_start = time.time()
    partial_num = 0
    total_num = 0
    si = 0
    for idx_batch in range(num_batches):
        test_idx = batch_list[idx_batch]
        batch_raw = [xy_list[xy_idx] for xy_idx in test_idx]
        batch = datar.get_data(batch_raw, modules, consts, options)

        assert len(test_idx) == batch.x.shape[1]  # local_batch_size

        word_emb, padding_mask = model.encode(
            torch.LongTensor(batch.x).to(options["device"]))

        if options["beam_decoding"]:
            for idx_s in range(len(test_idx)):
                if options["copy"]:
                    inputx = (torch.LongTensor(batch.x_ext[:, idx_s]).to(options["device"]), \
                            torch.FloatTensor(batch.x_mask[:, idx_s, :]).to(options["device"]), \
                        word_emb[:, idx_s, :], \
                        padding_mask[:, idx_s],\
                        batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s],\
                        batch.max_ext_len, batch.x_ext_words[idx_s])
                else:
                    inputx = (torch.LongTensor(batch.x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], padding_mask[:, idx_s],\
                              batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s])
                beam_decode(si, inputx, model, modules, consts, options)
                si += 1
        else:
            pass
            #greedy_decode()

        testing_batch_size = len(test_idx)
        partial_num += testing_batch_size
        total_num += testing_batch_size
        if partial_num >= consts["testing_print_size"]:
            print(total_num, "summs are generated")
            partial_num = 0
    print(si, total_num)
예제 #4
0
def run(existing_model_name=None):
    modules, consts, options = init_modules()

    #use_gpu(consts["idx_gpu"])
    if options["is_predicting"]:
        need_load_model = True
        training_model = False
        predict_model = True
    else:
        need_load_model = False
        training_model = True
        predict_model = False

    print_basic_info(modules, consts, options)

    if training_model:
        print "loading train set..."
        if options["is_debugging"]:
            xy_list = pickle.load(
                open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "r"))
        else:
            xy_list = pickle.load(
                open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r"))
        batch_list, num_files, num_batches = datar.batched(
            len(xy_list), options, consts)
        print "num_files = ", num_files, ", num_batches = ", num_batches

    running_start = time.time()
    if True:  #TODO: refactor
        print "compiling model ..."
        model = Model(modules, consts, options)
        #criterion = nn.NLLLoss(ignore_index=consts["pad_token_idx"])
        if options["cuda"]:
            model.cuda()
            #criterion.cuda()
            #model = nn.DataParallel(model)
        optimizer = torch.optim.Adagrad(model.parameters(),
                                        lr=consts["lr"],
                                        initial_accumulator_value=0.1)

        model_name = "cnndm.s2s"
        existing_epoch = 0
        if need_load_model:
            if existing_model_name == None:
                existing_model_name = "cnndm.s2s.gpu5.epoch5.5"
            print "loading existed model:", existing_model_name
            model, optimizer = load_model(
                cfg.cc.MODEL_PATH + existing_model_name, model, optimizer)

        if training_model:
            print "start training model "
            print_size = num_files / consts[
                "print_time"] if num_files >= consts[
                    "print_time"] else num_files

            last_total_error = float("inf")
            print "max epoch:", consts["max_epoch"]
            for epoch in xrange(0, consts["max_epoch"]):
                '''
                if not options["is_debugging"] and epoch == 5:
                    consts["lr"] *= 0.1
                    #adjust
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = consts["lr"]
                '''
                print "epoch: ", epoch + existing_epoch
                num_partial = 1
                total_error = 0.0
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                batch_list, num_files, num_batches = datar.batched(
                    len(xy_list), options, consts)
                used_batch = 0.
                for idx_batch in xrange(num_batches):
                    train_idx = batch_list[idx_batch]
                    batch_raw = [xy_list[xy_idx] for xy_idx in train_idx]
                    if len(batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(batch_raw)
                    batch = datar.get_data(batch_raw, modules, consts, options)

                    x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words)

                    model.zero_grad()
                    y_pred, cost = model(torch.LongTensor(x).cuda(), torch.LongTensor(len_x).cuda(),\
                                   torch.LongTensor(y).cuda(),  torch.FloatTensor(x_mask).cuda(), \
                                   torch.FloatTensor(y_mask).cuda(), torch.LongTensor(x_ext).cuda(), torch.LongTensor(y_ext).cuda(), \
                                   batch.max_ext_len, None)

                    cost.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
                    optimizer.step()

                    cost = cost.item()
                    total_error += cost
                    used_batch += 1
                    partial_num_files += consts["batch_size"]
                    if partial_num_files / print_size == 1 and idx_batch < num_batches:
                        print idx_batch + 1, "/", num_batches, "batches have been processed,",
                        print "average cost until now:", "cost =", total_error / used_batch, ",",
                        print "time:", time.time() - partial_start
                        partial_num_files = 0
                        if not options["is_debugging"]:
                            print "save model... ",
                            save_model(
                                cfg.cc.MODEL_PATH + model_name + ".gpu" +
                                str(consts["idx_gpu"]) + ".epoch" +
                                str(epoch / consts["save_epoch"] +
                                    existing_epoch) + "." + str(num_partial),
                                model, optimizer)
                            print "finished"
                        num_partial += 1
                print "in this epoch, total average cost =", total_error / used_batch, ",",
                print "time:", time.time() - epoch_start

                print_sent_dec(y_pred, y_ext, y_mask, oovs, modules, consts,
                               options, local_batch_size)

                if last_total_error > total_error or options["is_debugging"]:
                    last_total_error = total_error
                    if not options["is_debugging"]:
                        print "save model... ",
                        save_model(
                            cfg.cc.MODEL_PATH + model_name + ".gpu" +
                            str(consts["idx_gpu"]) + ".epoch" +
                            str(epoch / consts["save_epoch"] + existing_epoch)
                            + "." + str(num_partial), model, optimizer)
                        print "finished"
                else:
                    print "optimization finished"
                    break

            print "save final model... ",
            save_model(
                cfg.cc.MODEL_PATH + model_name + "final.gpu" +
                str(consts["idx_gpu"]) + ".epoch" +
                str(epoch / consts["save_epoch"] + existing_epoch) + "." +
                str(num_partial), model, optimizer)
            print "finished"
        else:
            print "skip training model"

        if predict_model:
            predict(model, modules, consts, options)
    print "Finished, time:", time.time() - running_start
예제 #5
0
def run():

    all_losses = []
    p_points = []
    continuing = False
    modules, consts, options = init_modules()

    #use_gpu(consts["idx_gpu"])
    print_basic_info(modules, consts, options)

    if not opt.predict:
        print "loading train set..."
        if opt.debug:
            xy_list = pickle.load(
                open(cfg.cc.TRAINING_DATA_PATH + "train_small.pkl", "r"))
        else:
            xy_list = pickle.load(
                open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r"))
        batch_list, num_files, num_batches = datar.batched(
            len(xy_list), options, consts)
        print "num_files = ", num_files, ", num_batches = ", num_batches

    running_start = time.time()
    if True:  #TODO: refactor
        print('model_path', cfg.cc.MODEL_PATH)
        continue_training = len(os.listdir(cfg.cc.MODEL_PATH)) != 0
        options['continue_training'] = continue_training
        print "compiling model ..."
        model = Model(modules, consts, options)
        if options["cuda"]:
            model.cuda()

        optimizer = torch.optim.Adagrad(model.parameters(),
                                        lr=consts["lr"],
                                        initial_accumulator_value=0.1)
        existing_epoch = 0
        if continue_training or opt.predict or opt.retrain:
            if opt.model_name == '':
                opt.model_name = list(
                    reversed(
                        sorted(
                            os.listdir(cfg.cc.MODEL_PATH),
                            key=lambda x: int(
                                re.match('.*step(\d+)', x).groups()[0]))))[0]
                continue_step = int(
                    re.match('.*step(\d+)', opt.model_name).groups()[0])
                name = cfg.cc.MODEL_PATH + opt.model_name
            else:
                continue_step = 0
                name = opt.model_name
            print "loading existed model:", name
            model, optimizer, all_losses, av_batch_losses, p_points, av_batch_p_points = load_model(
                name, model, optimizer)
        if opt.retrain:
            av_batch_losses = np.zeros(5)
            av_batch_p_points = np.zeros(1)
            all_losses = []
            p_points = []
            if options['coverage']:
                model.decoder.add_cov_weight()
                if options['cuda']:
                    model.cuda()
            print(model)
            if opt.retrain:
                # update optimizer, because network contains now coverage weights if coverage is on
                optimizer = torch.optim.Adagrad(model.parameters(),
                                                lr=consts["lr"],
                                                initial_accumulator_value=0.1)
        if continue_training and not opt.predict:
            continuing = True
            print('Continue training model from step {}'.format(continue_step))
        if not opt.predict:
            print "start training model "
            print_size = num_files / consts[
                "print_time"] if num_files >= consts[
                    "print_time"] else num_files
            steps = 0
            print(model)
            # cnndm.s2s.lstm.gpu0.epoch0.7
            last_total_error = float("inf")
            print "max epoch:", consts["max_epoch"]
            for epoch in xrange(0, consts["max_epoch"]):
                print "epoch: ", epoch + existing_epoch
                num_partial = 1
                if not continuing:
                    av_batch_losses = np.zeros(5)
                    av_batch_p_points = np.zeros(1)
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                batch_list, num_files, num_batches = datar.batched(
                    len(xy_list), options, consts)
                used_batch = 0.
                y_pred = None

                for idx_batch in xrange(num_batches):
                    if continue_training and steps <= continue_step:
                        used_batch += 1
                        init_seeds(steps)
                        steps += 1
                        partial_num_files += consts["batch_size"]
                        if partial_num_files % print_size == 0 and idx_batch < num_batches:
                            partial_num_files = 0
                            num_partial += 1

                        if steps == continue_step:
                            continuing = False
                        continue
                    else:
                        continuing = False

                    train_idx = batch_list[idx_batch]
                    batch_raw = [xy_list[xy_idx] for xy_idx in train_idx]
                    if len(batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(batch_raw)
                    batch = datar.get_data(batch_raw, modules, consts, options)

                    x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words)

                    model.zero_grad()

                    if opt.tf_schedule:
                        tf = teacher_forcing_ratio(steps,
                                                   options["tf_offset_decay"])
                    else:
                        tf = True
                    y_pred, losses, p_point = model(torch.LongTensor(x).to(options["device"]), torch.LongTensor(len_x).to(options["device"]),\
                                   torch.LongTensor(y).to(options["device"]),  torch.FloatTensor(x_mask).to(options["device"]), \
                                   torch.FloatTensor(y_mask).to(options["device"]), torch.LongTensor(x_ext).to(options["device"]),\
                                   torch.LongTensor(y_ext).to(options["device"]), \
                                   batch.max_ext_len)
                    total_loss = 0
                    # TODO: implement averge batch costs
                    for loss_ in losses:
                        if loss_ is not None:
                            total_loss += loss_

                    total_loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   consts["norm_clip"])
                    optimizer.step()

                    # append total loss to losses
                    losses = np.append(total_loss.item(), losses)

                    # transform tensors to floats
                    losses = [
                        loss.cpu().detach().numpy() if isinstance(
                            loss, torch.Tensor) else loss for loss in losses
                    ]

                    # with open(opt.result_path + '/result.log', "a") as log_file:
                    #     log_file.write("epoch {}, step {}, total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}\n".format(epoch, steps,*losses))

                    # if new batch reset
                    # add current losses to av_batch_losses

                    av_batch_losses = np.add(av_batch_losses, losses)
                    av_batch_p_points = np.add(av_batch_p_points, p_point)
                    used_batch += 1
                    partial_num_files += consts["batch_size"]
                    if partial_num_files % print_size == 0 and idx_batch < num_batches:
                        print("Step: {}").format(steps)

                        print idx_batch + 1, "/", num_batches, "batches have been processed,",
                        print(
                            "av_batchp_point {}, av_batch: total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}"
                            .format(av_batch_p_points / used_batch,
                                    *av_batch_losses / used_batch))
                        print "time:", time.time() - partial_start
                        partial_num_files = 0
                        if not opt.debug:
                            print "save model... ",
                            save_model(
                                cfg.cc.MODEL_PATH + "model.gpu" +
                                str(consts["idx_gpu"]) + ".epoch" +
                                str(epoch) + ".step" + str(steps), model,
                                optimizer, all_losses, av_batch_losses,
                                p_points, av_batch_p_points)
                            all_losses.append(av_batch_losses / used_batch)
                            p_points.append(av_batch_p_points / used_batch)
                            print "finished"
                        num_partial += 1
                    init_seeds(steps)
                    steps += 1
                if not continuing:
                    print("in this epoch:")
                    print(
                        "av_batchp_point {}, av_batch: total_loss {}, loss {}, cost_cov {}, cost_p_point {}, cost_w_prior {}"
                        .format(av_batch_p_points / used_batch,
                                *av_batch_losses / used_batch))
                    print "time:", time.time() - epoch_start
                    if y_pred is not None:
                        print_sent_dec(y_pred, y_ext, y_mask, oovs, modules,
                                       consts, options, local_batch_size)

                        if not opt.debug:
                            print "save model... ",
                            pickle.dump(
                                [all_losses, p_points],
                                open(opt.result_path + '/losses_p_points.p',
                                     'wb'))
                            save_model(
                                cfg.cc.MODEL_PATH + "model.gpu" +
                                str(consts["idx_gpu"]) + ".epoch" +
                                str(epoch) + ".step" + str(steps), model,
                                optimizer, all_losses, av_batch_losses,
                                p_points, av_batch_p_points)
                            print "finished"
            if not opt.debug:
                print "save final model... ",
                save_model(
                    cfg.cc.MODEL_PATH + "model.final.gpu" +
                    str(consts["idx_gpu"]), model, optimizer, all_losses,
                    av_batch_losses, p_points, av_batch_p_points)
                pickle.dump([all_losses, p_points],
                            open(opt.result_path + '/losses_p_points.p', 'wb'))
                print "finished"
        else:
            print "skip training model"

        if opt.predict:
            predict(model, modules, consts, options)
    print "Finished, time:", time.time() - running_start
예제 #6
0
def run(existing_model_name = None):
    modules, consts, options = init_modules()

    #use_gpu(consts["idx_gpu"])
    if options["is_predicting"]:
        need_load_model = True
        training_model = False
        predict_model = True
    else:
        need_load_model = False
        training_model = True
        predict_model = False

    print_basic_info(modules, consts, options)

    if training_model:
        print "loading train set..."
        if options["is_debugging"]:
            xy_list = pickle.load(open(cfg.cc.VALIDATE_DATA_PATH + "pj1000.pkl", "r")) 
        else:
            xy_list = pickle.load(open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "r")) 
        batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts)
        print "num_files = ", num_files, ", num_batches = ", num_batches

    running_start = time.time()
    if True: #TODO: refactor
        print "compiling model ..." 
        model = Model(modules, consts, options)
        if options["cuda"]:
            model.cuda()
        #optimizer = torch.optim.Adagrad(model.parameters(), lr=consts["lr"], initial_accumulator_value=0.1)
        optimizer = Optim(consts["hidden_size"], 1, 8000,\
                          torch.optim.Adam(model.parameters(),\
                          lr=consts["lr"], betas=(0.9, 0.998), eps=1e-9))
        

        model_name = "".join(["cnndm.s2s.", options["cell"]])
        existing_epoch = 0
        if need_load_model:
            if existing_model_name == None:
                existing_model_name = "cnndm.s2s.transformer.gpu4.epoch9.3"
            print "loading existed model:", existing_model_name
            model, optimizer = load_model(cfg.cc.MODEL_PATH + existing_model_name, model, optimizer)

        if training_model:
            
            model.train() # !!!!!!!

            print "start training model "
            print_size = num_files / consts["print_time"] if num_files >= consts["print_time"] else num_files

            last_total_error = float("inf")
            print "max epoch:", consts["max_epoch"]
            for epoch in xrange(0, consts["max_epoch"]):
                print "epoch: ", epoch + existing_epoch
                num_partial = 1
                total_error = 0.0
                error_c = 0.0
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                batch_list, num_files, num_batches = datar.batched(len(xy_list), options, consts)
                used_batch = 0.
                for idx_batch in xrange(num_batches):
                    train_idx = batch_list[idx_batch]
                    batch_raw = [xy_list[xy_idx] for xy_idx in train_idx]
                    if len(batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(batch_raw)
                    batch = datar.get_data(batch_raw, modules, consts, options)
                    
                    model.zero_grad()
                    
                    y_pred, cost, cost_c = model(torch.LongTensor(batch.x).to(options["device"]),\
                              torch.LongTensor(batch.px).to(options["device"]),\
                              torch.LongTensor(batch.pxs).to(options["device"]),\
                              torch.FloatTensor(batch.x_mask).to(options["device"]),\
                              torch.LongTensor(batch.y_inp).to(options["device"]),\
                              torch.LongTensor(batch.py).to(options["device"]),\
                              torch.LongTensor(batch.pys).to(options["device"]),\
                              torch.FloatTensor(batch.y_mask_tri).to(options["device"]),\
                              torch.LongTensor(batch.y).to(options["device"]),\
                              torch.FloatTensor(batch.y_mask).to(options["device"]),\
                              torch.LongTensor(batch.x_ext).to(options["device"]),\
                              torch.LongTensor(batch.y_ext).to(options["device"]),\
                              batch.max_ext_len)

                    if cost_c is None:
                        loss = cost
                    else:
                        loss = cost + cost_c
                        cost_c = cost_c.item()
                        error_c += cost_c
                    
                    loss.backward()
                    #torch.nn.utils.clip_grad_norm_(model.parameters(), consts["norm_clip"])
                    optimizer.step()
                    
                    cost = cost.item()
                    total_error += cost
                    used_batch += 1
                    partial_num_files += consts["batch_size"]
                    if partial_num_files / print_size == 1 and idx_batch < num_batches:
                        print idx_batch + 1, "/" , num_batches, "batches have been processed,", 
                        print "average cost until now:", "cost =", total_error / used_batch, ",", 
                        print "cost_c =", error_c / used_batch, ",",
                        print "time:", time.time() - partial_start
                        partial_num_files = 0
                        if not options["is_debugging"]:
                            print "save model... ",
                            save_model(cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer)
                            print "finished"
                        num_partial += 1
                print "in this epoch, total average cost =", total_error / used_batch, ",", 
                print "cost_c =", error_c / used_batch, ",",
                print "time:", time.time() - epoch_start

                print_sent_dec(y_pred, batch.y_ext, batch.y_mask, batch.x_ext_words, modules, consts, options, local_batch_size)
                
                if last_total_error > total_error or options["is_debugging"]:
                    last_total_error = total_error
                    if not options["is_debugging"]:
                        print "save model... ",
                        save_model(cfg.cc.MODEL_PATH + model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer)
                        print "finished"
                else:
                    print "optimization finished"
                    break

            print "save final model... ",
            save_model(cfg.cc.MODEL_PATH + model_name + ".final.gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch / consts["save_epoch"] + existing_epoch) + "." + str(num_partial), model, optimizer)
            print "finished"
        else:
            print "skip training model"

        if predict_model:
            model.eval()
            predict(model, modules, consts, options)
    print "Finished, time:", time.time() - running_start
예제 #7
0
def predict(model, modules, consts, options):
    print "start predicting,"
    options["has_y"] = TESTING_DATASET_CLS.HAS_Y
    if options["beam_decoding"]:
        print "using beam search"
    else:
        print "using greedy search"
    rebuild_dir(cfg.cc.BEAM_SUMM_PATH)
    rebuild_dir(cfg.cc.BEAM_GT_PATH)
    rebuild_dir(cfg.cc.GROUND_TRUTH_PATH)
    rebuild_dir(cfg.cc.SUMM_PATH)

    print "loading test set..."
    if opt.debug:
        xy_list = pickle.load(
            open(cfg.cc.TESTING_DATA_PATH + "test_500.pkl", "r"))
    else:
        xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "r"))
    batch_list, num_files, num_batches = datar.batched(len(xy_list), options,
                                                       consts)

    # Save order of batches for ngram overlap
    batches_sorted_idx = []

    print "num_files = ", num_files, ", num_batches = ", num_batches

    running_start = time.time()
    partial_num = 0
    total_num = 0
    si = 0
    for idx_batch in xrange(num_batches):
        test_idx = batch_list[idx_batch]
        batch_raw = [xy_list[xy_idx] for xy_idx in test_idx]
        batch = datar.get_data(batch_raw, modules, consts, options)

        assert len(test_idx) == batch.x.shape[1]  # local_batch_size

        x, len_x, x_mask, y, len_y, y_mask, oy, x_ext, y_ext, oovs, batch_sorted_idx = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words,
                                                             return_idx=True)
        batches_sorted_idx.append(batch_sorted_idx)


        word_emb, dec_state = model.encode(torch.LongTensor(x).to(options["device"]),\
                                           torch.LongTensor(len_x).to(options["device"]),\
                                           torch.FloatTensor(x_mask).to(options["device"]))

        if options["beam_decoding"]:
            for idx_s in xrange(len(test_idx)):
                if options["copy"]:
                    inputx = (torch.LongTensor(x_ext[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\
                          torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], oy[idx_s],\
                          batch.max_ext_len, oovs[idx_s])
                else:
                    inputx = (torch.LongTensor(x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\
                          torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], oy[idx_s])

                beam_decode(si, inputx, model, modules, consts, options)
                si += 1
        else:
            if options["copy"]:
                inputx = (torch.LongTensor(x_ext).to(options["device"]), word_emb, dec_state, \
                          torch.FloatTensor(x_mask).to(options["device"]), y, len_y, oy, batch.max_ext_len, oovs)
            else:
                inputx = (torch.LongTensor(x).to(options["device"]), word_emb,
                          dec_state, torch.FloatTensor(x_mask).to(
                              options["device"]), y, len_y, oy)
            greedy_decode(test_idx, inputx, model, modules, consts, options)
            si += len(test_idx)

        testing_batch_size = len(test_idx)
        partial_num += testing_batch_size
        total_num += testing_batch_size
        if partial_num >= consts["testing_print_size"]:
            print total_num, "summs are generated"
            partial_num = 0
    pickle.dump(batches_sorted_idx,
                open(opt.output_dir + '/test_batch_order.pkl', 'wb'))
    print si, total_num
예제 #8
0
def run(existing_model_name=None, is_predicting=0):
    modules, consts, options = init_modules(is_predicting)
    #print("value:", options["is_predicting"])
    if options["is_predicting"]:
        need_load_model = True
        training_model = False
        predict_model = True
    else:
        need_load_model = False
        training_model = True
        predict_model = False

    print_basic_info(modules, consts, options)

    if training_model:
        print("loading train set...")
        train_xy_list = pickle.load(
            open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb"))
        val_xy_list = pickle.load(
            open(cfg.cc.VALIDATE_DATA_PATH + "valid.pkl", "rb"))
        train_batch_list, train_size, n_train_batches = datar.batched(
            len(train_xy_list), options, consts)
        val_batch_list, val_size, n_val_batches = datar.batched(
            len(val_xy_list), options, consts)
        print("train size =", train_size, ", num training batches =",
              n_train_batches)
        print("val size =", val_size, ", num validation batches =",
              n_val_batches)

    running_start = time.time()
    if True:  #TODO: refactor
        print("compiling model ...")
        model = Model(modules, consts, options)
        if options["cuda"]:
            model.cuda()
        optimizer = torch.optim.Adagrad(model.parameters(),
                                        lr=consts["lr"],
                                        initial_accumulator_value=0.1)

        model_name = "".join(["s2s.", options["cell"]])
        existing_epoch = 0
        if need_load_model:
            if existing_model_name == None:
                existing_model_name = "cnndm.s2s.transformer.gpu0.epoch27.2"
            print("loading existed model:", existing_model_name)
            model, optimizer = load_model(
                cfg.cc.MODEL_PATH + existing_model_name, model, optimizer)

        if training_model:
            print("start training model ")

            #print_size = num_files // consts["print_time"] if num_files >= consts["print_time"] else num_files

            last_total_error = float("inf")
            best_val_loss = 999999999.0
            print("max epoch:", consts["max_epoch"])
            for epoch in range(0, consts["max_epoch"]):
                print("epoch %s:" % (epoch + existing_epoch))
                num_partial = 1
                train_loss = 0.0
                error_c = 0.0
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                train_batch_list, train_size, n_train_batches = datar.batched(
                    len(train_xy_list), options, consts)
                n_used_train_batch = 0
                model.train()
                for idx_batch in range(n_train_batches):
                    train_idx = train_batch_list[idx_batch]
                    train_batch_raw = [
                        train_xy_list[xy_idx] for xy_idx in train_idx
                    ]
                    if len(train_batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(train_batch_raw)
                    train_batch = datar.get_data(train_batch_raw, modules,
                                                 consts, options)

                    model.zero_grad()

                    y_pred, cost = model(torch.LongTensor(train_batch.x).cuda(options["device"]),\
                                   torch.LongTensor(train_batch.y_inp).cuda(options["device"]),\
                                   torch.LongTensor(train_batch.y).cuda(options["device"]),\
                                   torch.FloatTensor(train_batch.x_mask).cuda(options["device"]),\
                                   torch.FloatTensor(train_batch.y_mask).cuda(options["device"]),\
                                   torch.LongTensor(train_batch.x_ext).cuda(options["device"]),\
                                   torch.LongTensor(train_batch.y_ext).cuda(options["device"]), \
                                         train_batch.max_ext_len)

                    cost.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   consts["norm_clip"])
                    optimizer.step()

                    cost = cost.item()
                    train_loss += cost
                    n_used_train_batch += 1
                    partial_num_files += consts["batch_size"]
                    if n_used_train_batch % 3000 == 0:
                        print("\tprocessed %s batches..." % n_used_train_batch)
                        #break
                    """
                    if partial_num_files // print_size == 1 and idx_batch < num_batches:
                        print (idx_batch + 1, "/" , num_batches, "batches have been processed,", \
                                "average cost until now:", "cost =", total_error / used_batch, ",", \
                                "cost_c =", error_c / used_batch, ",", \
                                "time:", time.time() - partial_start)
                        partial_num_files = 0
                        if not options["is_debugging"]:
                            print("save model... ",)
                            file_name =  model_name + ".gpu" + str(consts["idx_gpu"]) + ".epoch" + str(epoch // consts["save_epoch"] + existing_epoch) + "." + str(num_partial)
                            save_model(cfg.cc.MODEL_PATH + file_name, model, optimizer)
                            if options["fire"]:
                                shutil.move(cfg.cc.MODEL_PATH + file_name, "/out/")

                            print("finished")
                        num_partial += 1
                    """
                elapsed_time = time.time() - epoch_start

                model.eval()
                n_used_val_batch = 0
                val_loss = 0.0
                with torch.no_grad():
                    for idx_batch in range(n_val_batches):
                        val_idx = val_batch_list[idx_batch]
                        val_batch_raw = [
                            val_xy_list[xy_idx] for xy_idx in val_idx
                        ]
                        if len(val_batch_raw) != consts["batch_size"]:
                            continue
                        local_batch_size = len(val_batch_raw)
                        val_batch = datar.get_data(val_batch_raw, modules,
                                                   consts, options)
                        y_pred, cost = model(torch.LongTensor(val_batch.x).cuda(options["device"]), \
                                             torch.LongTensor(val_batch.y_inp).cuda(options["device"]), \
                                             torch.LongTensor(val_batch.y).cuda(options["device"]), \
                                             torch.FloatTensor(val_batch.x_mask).cuda(options["device"]), \
                                             torch.FloatTensor(val_batch.y_mask).cuda(options["device"]), \
                                             torch.LongTensor(val_batch.x_ext).cuda(options["device"]), \
                                             torch.LongTensor(val_batch.y_ext).cuda(options["device"]), \
                                             val_batch.max_ext_len)
                        n_used_val_batch += 1
                        val_loss += cost.item()
                val_loss /= float(n_used_val_batch)
                print("in this epoch, training loss =",
                      train_loss / n_used_train_batch, ", validation loss =",
                      val_loss, ", time:", elapsed_time)
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    filename = cfg.cc.MODEL_PATH + model_name + "_checkpoint_epoch%s.pkl" % (
                        epoch + 1)
                    print("Exceed! save the model to %s..." % filename)
                    save_model(filename, model, optimizer)
            print("finished")
        else:
            print("skip training model")

        if predict_model:
            predict(model, modules, consts, options)
    print("Finished, time:", time.time() - running_start)
예제 #9
0
def predict(model, modules, consts, options):
    print("start predicting,")
    model.eval()
    options["has_y"] = TESTING_DATASET_CLS.HAS_Y
    if options["beam_decoding"]:
        print("using beam search")
    else:
        print("using greedy search")
    rebuild_dir(cfg.cc.BEAM_SUMM_PATH)
    rebuild_dir(cfg.cc.BEAM_GT_PATH)
    rebuild_dir(cfg.cc.GROUND_TRUTH_PATH)
    rebuild_dir(cfg.cc.SUMM_PATH)

    print("loading test set...")
    xy_list = pickle.load(open(cfg.cc.TESTING_DATA_PATH + "test.pkl", "rb"))
    batch_list, num_files, num_batches = datar.batched(len(xy_list), options,
                                                       consts)

    print("num_files = ", num_files, ", num_batches = ", num_batches)

    of = cfg.cc.RESULT_PATH + "out.txt"
    if os.path.exists(of):
        os.remove(of)

    pred_resp_file = cfg.cc.RESULT_PATH + 'pred_resp.txt'
    if os.path.exists(pred_resp_file):
        os.remove(pred_resp_file)

    gold_resp_file = cfg.cc.RESULT_PATH + 'gold_resp.txt'
    if os.path.exists(gold_resp_file):
        os.remove(gold_resp_file)

    running_start = time.time()
    partial_num = 0
    total_num = 0
    si = 0
    for idx_batch in range(num_batches):
        test_idx = batch_list[idx_batch]
        batch_raw = [xy_list[xy_idx] for xy_idx in test_idx]
        batch = datar.get_data(batch_raw, modules, consts, options)

        assert len(test_idx) == batch.x.shape[1]  # local_batch_size

        word_emb, padding_mask = model.encode(
            torch.LongTensor(batch.x).cuda(options["device"]))

        if options["beam_decoding"]:
            for idx_s in range(len(test_idx)):
                if options["copy"]:
                    inputx = (torch.LongTensor(batch.x_ext[:, idx_s]).cuda(options["device"]), \
                            torch.FloatTensor(batch.x_mask[:, idx_s, :]).cuda(options["device"]), \
                          word_emb[:, idx_s, :], padding_mask[:, idx_s],\
                          batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s],\
                          batch.max_ext_len, batch.x_ext_words[idx_s], batch.original_contents[idx_s])
                else:
                    inputx = (torch.LongTensor(batch.x[:, idx_s]).cuda(options["device"]), word_emb[:, idx_s, :], padding_mask[:, idx_s],\
                              batch.y[:, idx_s], [batch.len_y[idx_s]], batch.original_summarys[idx_s], batch.original_contents[idx_s])

                beam_decode(si, inputx, model, modules, consts, options)
                si += 1
        else:
            pass
            #greedy_decode()

        testing_batch_size = len(test_idx)
        partial_num += testing_batch_size
        total_num += testing_batch_size
        if partial_num >= consts["testing_print_size"]:
            print(total_num, "summs are generated")
            partial_num = 0
    pred_dist_file = cfg.cc.RESULT_PATH + 'pred_resp_dist.txt'
    print("Calculating distinct metrics...")
    os.system('python distinct_topk.py 1927 < %s > %s' %
              (pred_resp_file, pred_dist_file))
    os.system('perl multi-bleu.perl %s < %s' %
              (gold_resp_file, pred_resp_file))
    print(si, total_num)
예제 #10
0
def run(existing_model_name=None):
    modules, consts, options = init_modules()

    #use_gpu(consts["idx_gpu"])
    if options["is_predicting"]:
        need_load_model = True
        training_model = False
        predict_model = True
    else:
        need_load_model = False
        training_model = True
        predict_model = False

    print_basic_info(modules, consts, options)

    if training_model:
        print("loading train set...")
        if options["is_debugging"]:
            xy_list = pickle.load(
                open(cfg.cc.VALIDATE_DATA_PATH + "valid.pkl", "rb"))
        else:
            xy_list = pickle.load(
                open(cfg.cc.TRAINING_DATA_PATH + "train.pkl", "rb"))
        batch_list, num_files, num_batches = datar.batched(
            len(xy_list), options, consts)
        print("num_files = ", num_files, ", num_batches = ", num_batches)

    running_start = time.time()
    if True:  #TODO: refactor
        print("compiling model ...")
        model = Model(modules, consts, options)
        if options["cuda"]:
            model.cuda()
        optimizer = torch.optim.Adagrad(model.parameters(),
                                        lr=consts["lr"],
                                        initial_accumulator_value=0.1)

        model_name = "".join(["s2s.", options["cell"]])
        existing_epoch = 0
        if need_load_model:
            if existing_model_name == None:
                existing_model_name = "db.s2s.gru.gpu3.epoch15.5"
            print("loading existed model:", existing_model_name)
            model, optimizer = load_model(
                cfg.cc.MODEL_PATH + existing_model_name, model, optimizer)

        if training_model:
            print("start training model ")
            print_size = num_files // consts[
                "print_time"] if num_files >= consts[
                    "print_time"] else num_files

            last_total_error = float("inf")
            print("max epoch:", consts["max_epoch"])
            for epoch in range(0, consts["max_epoch"]):
                print("epoch: ", epoch + existing_epoch)
                num_partial = 1
                total_error = 0.0
                error_c = 0.0
                partial_num_files = 0
                epoch_start = time.time()
                partial_start = time.time()
                # shuffle the trainset
                batch_list, num_files, num_batches = datar.batched(
                    len(xy_list), options, consts)
                used_batch = 0.
                for idx_batch in range(num_batches):
                    train_idx = batch_list[idx_batch]
                    batch_raw = [xy_list[xy_idx] for xy_idx in train_idx]
                    if len(batch_raw) != consts["batch_size"]:
                        continue
                    local_batch_size = len(batch_raw)
                    batch = datar.get_data(batch_raw, modules, consts, options)

                    x, len_x, x_mask, y, len_y, y_mask, ox, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_contents, batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words)

                    model.zero_grad()
                    y_pred, cost, cost_c = model(torch.LongTensor(x).to(options["device"]), torch.LongTensor(len_x).to(options["device"]),\
                                   torch.LongTensor(y).to(options["device"]),  torch.FloatTensor(x_mask).to(options["device"]), \
                                   torch.FloatTensor(y_mask).to(options["device"]), torch.LongTensor(x_ext).to(options["device"]),\
                                   torch.LongTensor(y_ext).to(options["device"]), \
                                   batch.max_ext_len)
                    if cost_c is None:
                        loss = cost
                    else:
                        loss = cost + cost_c
                        cost_c = cost_c.item()
                        error_c += cost_c

                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   consts["norm_clip"])
                    optimizer.step()

                    cost = cost.item()
                    total_error += cost
                    used_batch += 1
                    partial_num_files += consts["batch_size"]
                    if (partial_num_files //
                            print_size) == 1 and idx_batch < num_batches:
                        print(idx_batch + 1, "/" , num_batches, "batches have been processed,", \
                             "average cost until now:", "cost =", total_error / used_batch, ",", \
                             "cost_c =", error_c / used_batch, ",",\
                             "time:", time.time() - partial_start)
                        partial_num_files = 0
                        if not options["is_debugging"]:
                            print("save model... ", )
                            save_model(
                                cfg.cc.MODEL_PATH + model_name + ".gpu" +
                                str(consts["idx_gpu"]) + ".epoch" +
                                str(epoch // consts["save_epoch"] +
                                    existing_epoch) + "." + str(num_partial),
                                model, optimizer)
                            print("finished")
                        num_partial += 1
                print("in this epoch, total average cost =", total_error / used_batch, ",", \
                     "cost_c =", error_c / used_batch, ",",\
                     "time:", time.time() - epoch_start)

                print_sent_dec(y_pred, y_ext, y_mask, oovs, modules, consts,
                               options, local_batch_size)

                if last_total_error > total_error or options["is_debugging"]:
                    last_total_error = total_error
                    if not options["is_debugging"]:
                        print("save model... ", )
                        save_model(
                            cfg.cc.MODEL_PATH + model_name + ".gpu" +
                            str(consts["idx_gpu"]) + ".epoch" +
                            str(epoch // consts["save_epoch"] + existing_epoch)
                            + "." + str(num_partial), model, optimizer)
                        print("finished")
                else:
                    print("optimization finished")
                    break

            print("save final model... ", )
            save_model(
                cfg.cc.MODEL_PATH + model_name + ".final.gpu" +
                str(consts["idx_gpu"]) + ".epoch" +
                str(epoch // consts["save_epoch"] + existing_epoch) + "." +
                str(num_partial), model, optimizer)
            print("finished")
        else:
            print("skip training model")

        if predict_model:
            predict(model, modules, consts, options)
    print("Finished, time:", time.time() - running_start)
예제 #11
0
def predict(model, modules, consts, options):
    print("start predicting,")
    options["has_y"] = TESTING_DATASET_CLS.HAS_Y
    if options["beam_decoding"]:
        print("using beam search")
    else:
        print("using greedy search")
    rebuild_dir(cfg.cc.BEAM_SUMM_PATH)
    rebuild_dir(cfg.cc.BEAM_GT_PATH)
    rebuild_dir(cfg.cc.GROUND_TRUTH_PATH)
    rebuild_dir(cfg.cc.SUMM_PATH)

    of = cfg.cc.RESULT_PATH + "out.txt"
    if os.path.exists(of):
        os.remove(of)

    print("loading test set...")
    if options["model_selection"]:
        xy_list = pickle.load(
            open(cfg.cc.VALIDATE_DATA_PATH + "pj3000.pkl", "rb"))
    else:
        xy_list = pickle.load(
            open(cfg.cc.TESTING_DATA_PATH + "pj3000.pkl", "rb"))
    batch_list, num_files, num_batches = datar.batched(len(xy_list), options,
                                                       consts)

    print("num_files = ", num_files, ", num_batches = ", num_batches)

    running_start = time.time()
    partial_num = 0
    total_num = 0
    si = 0
    for idx_batch in range(num_batches):
        test_idx = batch_list[idx_batch]
        batch_raw = [xy_list[xy_idx] for xy_idx in test_idx]
        batch = datar.get_data(batch_raw, modules, consts, options)

        assert len(test_idx) == batch.x.shape[1]  # local_batch_size

        x, len_x, x_mask, y, len_y, y_mask, ox, oy, x_ext, y_ext, oovs = sort_samples(batch.x, batch.len_x, \
                                                             batch.x_mask, batch.y, batch.len_y, batch.y_mask, \
                                                             batch.original_contents, batch.original_summarys, batch.x_ext, batch.y_ext, batch.x_ext_words)

        word_emb, dec_state = model.encode(torch.LongTensor(x).to(options["device"]),\
                                           torch.LongTensor(len_x).to(options["device"]),\
                                           torch.FloatTensor(x_mask).to(options["device"]))

        if options["beam_decoding"]:
            for idx_s in range(len(test_idx)):
                if options["copy"]:
                    inputx = (torch.LongTensor(x_ext[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\
                          torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], ox[idx_s], oy[idx_s],\
                          batch.max_ext_len, oovs[idx_s])
                else:
                    inputx = (torch.LongTensor(x[:, idx_s]).to(options["device"]), word_emb[:, idx_s, :], dec_state[idx_s, :],\
                          torch.FloatTensor(x_mask[:, idx_s, :]).to(options["device"]), y[:, idx_s], [len_y[idx_s]], ox[idx_s], oy[idx_s])

                beam_decode(si, inputx, model, modules, consts, options)
                si += 1
        else:
            if options["copy"]:
                inputx = (torch.LongTensor(x_ext).to(options["device"]), word_emb, dec_state, \
                          torch.FloatTensor(x_mask).to(options["device"]), y, len_y, oy, batch.max_ext_len, oovs)
            else:
                inputx = (torch.LongTensor(x).to(options["device"]), word_emb,
                          dec_state, torch.FloatTensor(x_mask).to(
                              options["device"]), y, len_y, oy)
            greedy_decode(test_idx, inputx, model, modules, consts, options)
            si += len(test_idx)

        testing_batch_size = len(test_idx)
        partial_num += testing_batch_size
        total_num += testing_batch_size
        if partial_num >= consts["testing_print_size"]:
            print(total_num, "summs are generated")
            partial_num = 0
    print(si, total_num)
lr = 0.001
drop_rate = 0.
batch_size = 128
hidden_size = 500
latent_size = 2
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum
optimizer = "adam"
continuous = False

if continuous:
    pass
else:
    train_set, valid_set, test_set = data.mnist()

train_xy = data.batched(train_set, batch_size)
dim_x = train_xy[0][0].shape[1]
dim_y = train_xy[0][1].shape[1]
print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
model = VAE(dim_x, dim_x, hidden_size, latent_size, optimizer)

print "training..."
start = time.time()
for i in xrange(50):
    error = 0.0
    in_start = time.time()
    for batch_id, xy in train_xy.items():
        X = xy[0] 
        cost = model.train(X, lr)