Beispiel #1
0
def evaluate(batch_idx, epoch):
    model.eval()
    hyp_list = []
    ref_list = []
    start_time = time.time()
    for ix, batch in enumerate(valid_iter, start=1):
        src_raw = batch[0]
        trg_raw = batch[1:]
        src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK,
                                     PAD, SOS, EOS)
        with torch.no_grad():
            output = model.beamsearch(src,
                                      src_mask,
                                      opt.beam_size,
                                      normalize=True)
            best_hyp, best_score = output[0]
            best_hyp = convert_str([best_hyp], trg_vocab)
            hyp_list.append(best_hyp[0])
            ref = map(lambda x: x[0], trg_raw)
            ref_list.append(ref)
    elapsed = time.time() - start_time
    bleu1 = corpus_bleu(ref_list,
                        hyp_list,
                        smoothing_function=SmoothingFunction().method1)
    hyp_list = map(lambda x: ' '.join(x), hyp_list)
    p_tmp = tempfile.mktemp()
    f_tmp = open(p_tmp, 'w')
    f_tmp.write('\n'.join(hyp_list))
    f_tmp.close()
    bleu2 = bleu_script(p_tmp)
    print('BLEU score for {}-{} is {}/{}, {}'.format(epoch, batch_idx, bleu1,
                                                     bleu2, elapsed))
    opt.score_list.append((bleu2, batch_idx, epoch))
Beispiel #2
0
def decode(args):
    option, values = load_model(args.model)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    if args.gpuid >= 0:
        config.gpu_options.visible_device_list = "%d" % args.gpuid

    svocabs, tvocabs = option["vocabulary"]
    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs

    unk_sym = option["unk"]
    eos_sym = option["eos"]

    count = 0

    doption = {}
    doption["maxlen"] = args.maxlen
    doption["minlen"] = args.minlen
    doption["beamsize"] = args.beamsize
    doption["normalize"] = args.normalize

    with tf.Session(config=config):
        model = rnnsearch(**option)

        model.option = option

        tf.global_variables_initializer().run()
        set_variables(tf.trainable_variables(), values)

        while True:
            line = sys.stdin.readline()

            if line == "":
                break

            data = [line]
            seq, seq_len = convert_data(data, svocab, unk_sym, eos_sym)
            t1 = time.time()
            tlist = beamsearch(model, seq, **doption)
            t2 = time.time()

            if len(tlist) == 0:
                sys.stdout.write("\n")
                score = -10000.0
            else:
                best, score = tlist[0]
                sys.stdout.write(" ".join(best[:-1]))
                sys.stdout.write("\n")

            count = count + 1
            sys.stderr.write(str(count) + " ")
            sys.stderr.write(str(score) + " " + str(t2 - t1) + "\n")
 def sample_fn(*args, **kwargs):
     data = args[0]
     batch = len(data[0])
     ind = np.random.randint(0, batch)
     sdata = data[0][ind]
     tdata = data[1][ind]
     xdata, _, xlen = convert_data(data[0], svocab, unk, eos)
     xdata = xdata[ind:ind + 1, :]
     xlen = xlen[ind:ind + 1]
     hls = beamsearch(model, xdata, xlen, **search_opt)
     best, score = hls[0]
     print("> " + sdata)
     print("> " + tdata)
     print("> " + " ".join(best[:-1]))
Beispiel #4
0
def evaluate(batch_idx, epoch):
    model.eval()
    hyp_list = []
    ref_list = []
    start_time = time.time()
    for ix, batch in enumerate(valid_iter, start=1):
        src_raw = batch[0]
        trg_raw = batch[1:]
        src, src_mask = convert_data(
            src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS
        )
        with torch.no_grad():
            output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True)
            best_hyp, best_score = output[0]
            best_hyp = convert_str([best_hyp], trg_vocab)
            hyp_list.append(best_hyp[0])
            ref = [x[0] for x in trg_raw]
            ref_list.append(ref)
    elapsed = time.time() - start_time

    hyp_list = [" ".join(x) for x in hyp_list]
    p_tmp = tempfile.mktemp()
    f_tmp = open(p_tmp, "w")
    f_tmp.write("\n".join(hyp_list))
    f_tmp.close()
    bleu2 = bleu_script(p_tmp)

    bleu_1_gram = bleu(hyp_list, ref_list, smoothing=True, n=1)
    bleu_2_gram = bleu(hyp_list, ref_list, smoothing=True, n=2)
    bleu_3_gram = bleu(hyp_list, ref_list, smoothing=True, n=3)
    bleu_4_gram = bleu(hyp_list, ref_list, smoothing=True, n=4)
    writer.add_scalar("./bleu_1_gram", bleu_1_gram, epoch)
    writer.add_scalar("./bleu_2_gram", bleu_2_gram, epoch)
    writer.add_scalar("./bleu_3_gram", bleu_3_gram, epoch)
    writer.add_scalar("./bleu_4_gram", bleu_4_gram, epoch)
    writer.add_scalar("./multi-bleu", bleu2, epoch)
    bleu_result = [
        ["multi-bleu", "bleu_1-gram", "bleu_2-gram", "bleu_3-gram", "bleu_4-gram"],
        [bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram],
    ]
    bleu_table = AsciiTable(bleu_result)
    logger.info(
        "BLEU score for Epoch-{}-batch-{}: ".format(epoch, batch_idx)
        + "\n"
        + bleu_table.table
    )
    opt.score_list.append(
        (bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram, batch_idx, epoch)
    )
Beispiel #5
0
def translate(model, corpus, **opt):
    fd = open(corpus, "r")
    svocab = model.option["vocabulary"][0][0]
    unk_symbol = model.option["unk"]
    eos_symbol = model.option["eos"]

    trans = []

    for line in fd:
        line = line.strip()
        data, length = convert_data([line], svocab, unk_symbol, eos_symbol)
        hls = beamsearch(model, data, **opt)
        if len(hls) > 0:
            best, score = hls[0]
            trans.append(best[:-1])
        else:
            trans.append([])

    fd.close()

    return trans
Beispiel #6
0
        sys.stderr.write(err)
        sys.exit(1)
    bleu = float(out)
    return bleu


hyp_list = []
ref_list = []
start_time = time.time()
for ix, batch in enumerate(test_iter, start=1):
    src_raw = batch[0]
    trg_raw = batch[1:]
    src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK, PAD,
                                 SOS, EOS)
    with torch.no_grad():
        output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True)
        best_hyp, best_score = output[0]
        best_hyp = convert_str([best_hyp], trg_vocab)
        hyp_list.append(best_hyp[0])
        ref = map(lambda x: x[0], trg_raw)
        ref_list.append(ref)
    print(ix, len(test_iter), 100. * ix / len(test_iter))
elapsed = time.time() - start_time
bleu1 = corpus_bleu(ref_list,
                    hyp_list,
                    smoothing_function=SmoothingFunction().method1)
hyp_list = map(lambda x: ' '.join(x), hyp_list)
p_tmp = tempfile.mktemp()
f_tmp = open(p_tmp, 'w')
f_tmp.write('\n'.join(hyp_list))
f_tmp.close()
Beispiel #7
0
def train(args):
    option = default_option()

    # predefined model names
    pathname, basename = os.path.split(args.model)
    modelname = get_filename(basename)
    autoname = os.path.join(pathname, modelname + ".autosave.pkl")
    bestname = os.path.join(pathname, modelname + ".best.pkl")

    # load models
    if os.path.exists(args.model):
        option, params = load_model(args.model)
        init = False
    else:
        init = True

    if args.initialize:
        init_params = load_model(args.initialize)
        init_params = init_params[1]
        restore = True
    else:
        restore = False

    override(option, args)
    print_option(option)

    # load references
    if option["references"]:
        references = load_references(option["references"])
    else:
        references = None

    # input corpus
    batch = option["batch"]
    sortk = option["sort"] or 1
    shuffle = option["seed"] if option["shuffle"] else None
    reader = textreader(option["corpus"], shuffle)
    processor = [data_length, data_length]
    stream = textiterator(reader, [batch, batch * sortk], processor,
                          option["limit"], option["sort"])

    if shuffle and option["indices"] is not None:
        reader.set_indices(option["indices"])

    if args.reset:
        option["count"] = [0, 0]
        option["epoch"] = 0
        option["cost"] = 0.0

    skip_stream(reader, option["count"][1])
    epoch = option["epoch"]
    maxepoch = option["maxepoch"]

    # create session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    if args.gpuid >= 0:
        config.gpu_options.visible_device_list = "%d" % args.gpuid

    with tf.Session(config=config):
        # set seed
        np.random.seed(option["seed"])
        tf.set_random_seed(option["seed"])

        # create model
        initializer = tf.random_uniform_initializer(-0.08, 0.08)
        model = rnnsearch(initializer=initializer, **option)

        print "parameters:", count_parameters(tf.trainable_variables())

        variables = None

        if restore:
            matched, not_matched = match_variables(tf.trainable_variables(),
                                                   init_params)
            if args.finetune:
                variables = not_matched

        # create optimizer
        constraint = ["norm", option["norm"]]
        optim = optimizer(model, algorithm=option["optimizer"], norm=True,
                          constraint=constraint, variables=variables)

        tf.global_variables_initializer().run()

        if not init:
            set_variables(tf.trainable_variables(), params)

        if restore:
            restore_variables(matched, not_matched)

        # beamsearch option
        search_opt = {}
        search_opt["beamsize"] = option["beamsize"]
        search_opt["normalize"] = option["normalize"]
        search_opt["maxlen"] = option["maxlen"]
        search_opt["minlen"] = option["minlen"]

        # vocabulary and special symbol
        svocabs, tvocabs = option["vocabulary"]
        svocab, isvocab = svocabs
        tvocab, itvocab = tvocabs
        unk_sym = option["unk"]
        eos_sym = option["eos"]

        # summary
        count = option["count"][0]
        totcost = option["cost"]
        best_score = option["bleu"]
        alpha = option["alpha"]

        for i in range(epoch, maxepoch):
            for data in stream:
                xdata, xlen = convert_data(data[0], svocab, unk_sym, eos_sym)
                ydata, ylen = convert_data(data[1], tvocab, unk_sym, eos_sym)

                t1 = time.time()
                cost, norm = optim.optimize(xdata, xlen, ydata, ylen)
                optim.update(alpha=alpha)
                t2 = time.time()

                count += 1
                cost = cost * len(ylen) / sum(ylen)
                totcost += cost / math.log(2)

                print i + 1, count, cost, norm, t2 - t1

                # save model
                if count % option["freq"] == 0:
                    option["indices"] = reader.get_indices()
                    option["bleu"] = best_score
                    option["cost"] = totcost
                    option["count"] = [count, reader.count]
                    serialize(autoname, option)

                if count % option["vfreq"] == 0:
                    if option["validation"] and references:
                        trans = translate(model, option["validation"],
                                          **search_opt)
                        bleu_score = bleu(trans, references)
                        print "bleu: %2.4f" % bleu_score
                        if bleu_score > best_score:
                            best_score = bleu_score
                            option["indices"] = reader.get_indices()
                            option["bleu"] = best_score
                            option["cost"] = totcost
                            option["count"] = [count, reader.count]
                            serialize(bestname, option)

                if count % option["sfreq"] == 0:
                    batch = len(data[0])
                    ind = np.random.randint(0, batch)
                    sdata = data[0][ind]
                    tdata = data[1][ind]
                    xdata = xdata[:, ind : ind + 1]
                    xlen = xlen[ind : ind + 1]
                    hls = beamsearch(model, xdata, xlen, **search_opt)
                    best, score = hls[0]
                    print sdata
                    print tdata
                    print "search score:", score
                    print "translation:", " ".join(best[:-1])

            print "--------------------------------------------------"

            if option["vfreq"] and references:
                trans = translate(model, option["validation"], **search_opt)
                bleu_score = bleu(trans, references)
                print "iter: %d, bleu: %2.4f" % (i + 1, bleu_score)
                if bleu_score > best_score:
                    best_score = bleu_score
                    option["indices"] = reader.get_indices()
                    option["bleu"] = best_score
                    option["cost"] = totcost
                    option["count"] = [count, reader.count]
                    serialize(bestname, option)

            print "averaged cost: ", totcost / option["count"][0]
            print "--------------------------------------------------"

            # early stopping
            if i >= option["stop"]:
                alpha = alpha * option["decay"]

            count = 0
            totcost = 0.0
            stream.reset()

            # update autosave
            option["epoch"] = i + 1
            option["alpha"] = alpha
            option["indices"] = reader.get_indices()
            option["bleu"] = best_score
            option["cost"] = totcost
            option["count"] = [0, 0]
            serialize(autoname, option)

        print "best(bleu): %2.4f" % best_score

    stream.close()
Beispiel #8
0
def train(epoch):
    model.train()
    opt.epoch_best_score = -float("inf")
    opt.epoch_best_name = None
    for batch_idx, batch in enumerate(train_iter, start=1):
        batch = sort_batch(batch)
        src_raw = batch[0]
        trg_raw = batch[1]
        src, src_mask = convert_data(
            src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS
        )
        f_trg, f_trg_mask = convert_data(
            trg_raw, trg_vocab, device, False, UNK, PAD, SOS, EOS
        )
        b_trg, b_trg_mask = convert_data(
            trg_raw, trg_vocab, device, True, UNK, PAD, SOS, EOS
        )
        optimizer.zero_grad()
        if opt.cuda and torch.cuda.device_count() > 1 and opt.local_rank is None:
            loss, w_loss = nn.parallel.data_parallel(
                model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask), device_ids
            )
        else:
            loss, w_loss = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask)
        global_batches = len(train_iter) * epoch + current_batches
        writer.add_scalar(
            "./loss", scalar_value=loss.item(), global_step=global_batches,
        )
        loss.mean().backward()
        torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip)
        optimizer.step()
        if batch_idx % 10 == 0 or batch_idx == len(train_iter) or batch_idx == 0:
            logger.info(
                str(
                    "Epoch: {} batch: {}/{}({:.3%}), loss: {:.6}, lr: {}".format(
                        epoch,
                        batch_idx,
                        len(train_iter),
                        batch_idx / len(train_iter),
                        loss.item(),
                        opt.cur_lr,
                    )
                )
            )

        # validation
        if batch_idx % opt.vfreq == 0:
            logger.info(str("===========validation / test START==========="))
            evaluate(batch_idx, epoch)
            model.train()
            if opt.decay_lr:
                adjust_learningrate(opt.score_list)
            if len(opt.score_list) == 1 or opt.score_list[-1][0] > max(
                [x[0] for x in opt.score_list[:-1]]
            ):
                if opt.best_name is not None:
                    os.remove(os.path.join(opt.checkpoint, opt.best_name))
                opt.best_name = save_model(model, batch_idx, epoch, "best")
            if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score:
                opt.epoch_best_score = opt.score_list[-1][0]
                if opt.epoch_best_name is not None:
                    os.remove(os.path.join(opt.checkpoint, opt.epoch_best_name))
                opt.epoch_best_name = save_model(model, batch_idx, epoch, "epoch-best")
            logger.info("===========validation / test DONE===========")

        # sampling
        if batch_idx % opt.sfreq == 0:
            length = len(src_raw)
            ix = np.random.randint(0, length)
            samp_src_raw = [src_raw[ix]]
            samp_trg_raw = [trg_raw[ix]]
            samp_src, samp_src_mask = convert_data(
                samp_src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS
            )
            model.eval()
            with torch.no_grad():
                output = model.beamsearch(samp_src, samp_src_mask, opt.beam_size)
            best_hyp, best_score = output[0]
            best_hyp = convert_str([best_hyp], trg_vocab)
            sampling_result = []
            sampling_result.append(["Key", "Value"])
            sampling_result.append(["Source", str(" ".join(samp_src_raw[0]))])
            sampling_result.append(["Target", str(" ".join(samp_trg_raw[0]))])
            sampling_result.append(["Predict", str(" ".join(best_hyp[0]))])
            sampling_result.append(["Best Score", str(round(best_score, 5))])
            sampling_table = AsciiTable(sampling_result)
            logger.info("===========sampling START===========")
            logger.info("\n" + str(sampling_table.table))
            logger.info("===========sampling DONE===========")
            model.train()

        # saving model
        if opt.freq and batch_idx % opt.freq == 0:
            if opt.tmp_name is not None:
                os.remove(os.path.join(opt.checkpoint, opt.tmp_name))
            opt.tmp_name = save_model(model, batch_idx, epoch, "tmp")
def decode(args):
    option, values = load_model(args.model)
    #option, values = load_average_model(args.model)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    svocabs, tvocabs = option["vocabulary"]
    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs

    unk_sym = option["unk"]
    eos_sym = option["eos"]

    source_word2vec, target_word2vec = option["word2vecs"]

    count = 0

    doption = {
        "maxlen": args.maxlen,
        "minlen": args.minlen,
        "beamsize": args.beamsize,
        "normalize": args.normalize
    }

    # create graph
    model = NMT(option["num_layers"], option["num_heads"],
                option["attention_dropout"], option["residual_dropout"],
                option["relu_dropout"],
                option["embedding"], option["hidden"], option["filter"],
                len(isvocab), len(itvocab), source_word2vec, target_word2vec)

    model.option = option

    input_file = open(args.corpus, 'r')
    output_file = open(args.translation, 'w')

    with tf.Session(config=config):
        tf.global_variables_initializer().run()
        set_variables(tf.trainable_variables(), values)

        line = input_file.readline()
        while line:
            line_list = line.split()
            data = [line]
            seq, _, seq_len = convert_data(data, svocab, unk_sym, eos_sym)
            t1 = time.time()
            tlist = beamsearch(model, seq, seq_len, **doption)
            t2 = time.time()

            if len(tlist) == 0:
                sys.stdout.write("\n")
                score = -10000.0
            else:
                best, score = tlist[0]
                output_file.write(" ".join(best[:-1]))
                output_file.write("\n")

            count = count + 1
            sys.stderr.write(str(count) + " ")
            sys.stderr.write(str(score) + " " + str(t2 - t1) + "\n")
            line = input_file.readline()
    output_file.close()
    input_file.close()
Beispiel #10
0
def train(epoch):
    model.train()
    opt.epoch_best_score = -float('inf')
    opt.epoch_best_name = None
    for batch_idx, batch in enumerate(train_iter, start=1):
        start_time = time.time()
        batch = sort_batch(batch)
        src_raw = batch[0]
        trg_raw = batch[1]
        src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK,
                                     PAD, SOS, EOS)
        f_trg, f_trg_mask = convert_data(trg_raw, trg_vocab, device, False,
                                         UNK, PAD, SOS, EOS)
        b_trg, b_trg_mask = convert_data(trg_raw, trg_vocab, device, True, UNK,
                                         PAD, SOS, EOS)
        optimizer.zero_grad()
        if opt.cuda and torch.cuda.device_count(
        ) > 1 and opt.local_rank is None:
            R = nn.parallel.data_parallel(
                model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask),
                device_ids)
        else:
            R = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask)
        R[0].mean().backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip)
        optimizer.step()
        elapsed = time.time() - start_time
        R = map(lambda x: str(x.mean().item()), R)
        print(epoch, batch_idx,
              len(train_iter), 100. * batch_idx / len(train_iter), ' '.join(R),
              grad_norm.item(), opt.cur_lr, elapsed)

        # validation
        if batch_idx % opt.vfreq == 0:
            evaluate(batch_idx, epoch)
            model.train()
            if opt.decay_lr:
                adjust_learningrate(opt.score_list)
            if len(opt.score_list) == 1 or \
                opt.score_list[-1][0] > max(map(lambda x: x[0], opt.score_list[:-1])):
                if opt.best_name is not None:
                    os.remove(os.path.join(opt.checkpoint, opt.best_name))
                opt.best_name = save_model(model, batch_idx, epoch, 'best')
            if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score:
                opt.epoch_best_score = opt.score_list[-1][0]
                if opt.epoch_best_name is not None:
                    os.remove(os.path.join(opt.checkpoint,
                                           opt.epoch_best_name))
                opt.epoch_best_name = save_model(model, batch_idx, epoch,
                                                 'epoch-best')

        # sampling
        if batch_idx % opt.sfreq == 0:
            length = len(src_raw)
            ix = np.random.randint(0, length)
            samp_src_raw = [src_raw[ix]]
            samp_trg_raw = [trg_raw[ix]]
            samp_src, samp_src_mask = convert_data(samp_src_raw, src_vocab,
                                                   device, True, UNK, PAD, SOS,
                                                   EOS)
            model.eval()
            with torch.no_grad():
                output = model.beamsearch(samp_src, samp_src_mask,
                                          opt.beam_size)
            best_hyp, best_score = output[0]
            best_hyp = convert_str([best_hyp], trg_vocab)
            print('--', ' '.join(samp_src_raw[0]))
            print('--', ' '.join(samp_trg_raw[0]))
            print('--', ' '.join(best_hyp[0]))
            print('--', best_score)
            model.train()

        # saving model
        if opt.freq and batch_idx % opt.freq == 0:
            if opt.tmp_name is not None:
                os.remove(os.path.join(opt.checkpoint, opt.tmp_name))
            opt.tmp_name = save_model(model, batch_idx, epoch, 'tmp')