def main(): args = get_translate_args() print(json.dumps(args.__dict__, indent=4)) # Reading the vocab file with open(os.path.join(args.input, args.data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) w2id = {w: i for i, w in id2w.items()} source_data = preprocess.make_dataset(os.path.realpath(args.src), w2id, args.tok) checkpoint = torch.load(args.best_model_file) print("=> loaded checkpoint '{}' (epoch {}, best score {})".format( args.model_file, checkpoint['epoch'], checkpoint['best_score'])) config = checkpoint['opts'] model = net.Transformer(config) model.load_state_dict(checkpoint['state_dict']) if args.gpu >= 0: model.cuda(args.gpu) print(model) hyp = TranslateText(model, source_data, batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha)() save_output(hyp, id2w, args.output)
def gen_data(): params = { 'dim': (Config.NUM_ID, 2 * Config.NUM_INTVL), 'batch_size': 64, 'n_classes': 2, 'n_channels': 1, 'shuffle': True } Config.DATAPATH = 'data/test/' make_dataset("DoS_variation.csv") data = os.listdir(Config.DATAPATH) data.remove('labels.npy') data = data[int(len(data) / 10 * 8.5):] labels = np.load(Config.DATAPATH + "labels.npy") gen_test = DataGenerator(data, labels, **params) return gen_test
def __init__(self, fpath, IsTest=False): directory = pathlib.Path(fpath) dataset = make_dataset(directory) if IsTest: words, tags, ids = make_bert_testset(dataset) else: words, tags, ids = make_bert_dataset(dataset) flat_words, flat_tags, flat_ids = [], [], [] for article_w, article_t, article_id in zip(words, tags, ids): for sentence, tag, id in zip(article_w, article_t, article_id): flat_words.append(sentence) flat_tags.append(tag) flat_ids.append(id) sents, ids = [], [] tags_li = [[] for _ in range(num_task)] for word, tag, id in zip(flat_words, flat_tags, flat_ids): words = word tags = tag ids.append([id]) sents.append(["[CLS]"] + words + ["[SEP]"]) tmp_tags = [] if num_task != 2: for i in range(num_task): tmp_tags.append(['O'] * len(tags)) for j, tag in enumerate(tags): if tag != 'O' and tag in VOCAB[i]: tmp_tags[i][j] = tag tags_li[i].append(["<PAD>"] + tmp_tags[i] + ["<PAD>"]) elif num_task == 2: tmp_tags.append(['O'] * len(tags)) tmp_tags.append(['Non-prop']) for j, tag in enumerate(tags): if tag != 'O' and tag in VOCAB[0]: tmp_tags[0][j] = tag tmp_tags[1] = ['Prop'] for i in range(num_task): tags_li[i].append(["<PAD>"] + tmp_tags[i] + ["<PAD>"]) self.sents, self.ids, self.tags_li = sents, ids, tags_li assert len(sents) == len(ids) == len(tags_li[0])
def __init__(self, directory, is_test=False, verbose=False): dataset = make_dataset(directory) words, tags, ids = make_bert_dataset(dataset, is_test=is_test, verbose=verbose) # ( # [ [ ['first_word', 'second_word', ...], ... ], ... ], # [ [ ['label1', 'label2', ...], ... ], ... ], # [ [ id1, id2, ... ], ... ] # ) flat_ids, flat_sents = [], [] tags_li = [[] for _ in range(NUM_TASK)] for article_words, article_tags, article_ids in zip(words, tags, ids): for inner_words, inner_tags, id_ in zip(article_words, article_tags, article_ids): flat_sents.append(["[CLS]"] + inner_words + ["[SEP]"]) flat_ids.append(id_) tmp_tags = [] if NUM_TASK == 1: # technique classification tmp_tags.append(['O'] * len(inner_tags)) for j, inner_tag in enumerate(inner_tags): if inner_tag != 'O' and inner_tag in VOCAB[0]: tmp_tags[0][j] = inner_tag tags_li[0].append(["<PAD>"] + tmp_tags[0] + ["<PAD>"]) else: # sentence classification tmp_tags.append(['O'] * len(inner_tags)) tmp_tags.append(['Non-prop']) for j, inner_tag in enumerate(inner_tags): if inner_tag != 'O' and inner_tag in VOCAB[0]: tmp_tags[0][j] = inner_tag tmp_tags[1] = ['Prop'] for i in range(NUM_TASK): tags_li[i].append(["<PAD>"] + tmp_tags[i] + ["<PAD>"]) self.sents, self.ids, self.tags_li = flat_sents, flat_ids, tags_li assert len(self.sents) == len(self.ids) == len(self.tags_li[0])
def main(): parser = argparse.ArgumentParser( description='Chainer example: convolutional seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=48, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', type=int, default=512, help='Number of units') parser.add_argument('--layer', '-l', type=int, default=6, help='Number of layers') parser.add_argument('--head', type=int, default=8, help='Number of heads in attention mechanism') parser.add_argument('--dropout', '-d', type=float, default=0.1, help='Dropout rate') parser.add_argument('--input', '-i', type=str, default='./', help='Input directory') parser.add_argument('--source', '-s', type=str, default='europarl-v7.fr-en.en', help='Filename of train data for source language') parser.add_argument('--target', '-t', type=str, default='europarl-v7.fr-en.fr', help='Filename of train data for target language') parser.add_argument('--source-valid', '-svalid', type=str, default='dev/newstest2013.en', help='Filename of validation data for source language') parser.add_argument('--target-valid', '-tvalid', type=str, default='dev/newstest2013.fr', help='Filename of validation data for target language') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--source-vocab', type=int, default=40000, help='Vocabulary size of source language') parser.add_argument('--target-vocab', type=int, default=40000, help='Vocabulary size of target language') parser.add_argument('--no-bleu', '-no-bleu', action='store_true', help='Skip BLEU calculation') parser.add_argument('--use-label-smoothing', action='store_true', help='Use label smoothing for cross entropy') parser.add_argument('--embed-position', action='store_true', help='Use position embedding rather than sinusoid') parser.add_argument('--use-fixed-lr', action='store_true', help='Use fixed learning rate rather than the ' + 'annealing proposed in the paper') args = parser.parse_args() print(json.dumps(args.__dict__, indent=4)) # Check file en_path = os.path.join(args.input, args.source) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(en_path, args.source_vocab) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(fr_path, args.target_vocab) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, args.source_valid) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target_valid) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} # Define Model model = net.Transformer(args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup Optimizer optimizer = chainer.optimizers.Adam(alpha=5e-5, beta1=0.9, beta2=0.98, eps=1e-9) optimizer.setup(model) # Setup Trainer train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) iter_per_epoch = len(train_data) // args.batchsize print('Number of iter/epoch =', iter_per_epoch) updater = training.StandardUpdater(train_iter, optimizer, converter=seq2seq_pad_concat_convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # If you want to change a logging interval, change this number log_trigger = (min(200, iter_per_epoch // 2), 'iteration') def floor_step(trigger): floored = trigger[0] - trigger[0] % log_trigger[0] if floored <= 0: floored = trigger[0] return (floored, trigger[1]) # Validation every half epoch eval_trigger = floor_step((iter_per_epoch // 2, 'iteration')) record_trigger = training.triggers.MinValueTrigger('val/main/perp', eval_trigger) evaluator = extensions.Evaluator(test_iter, model, converter=seq2seq_pad_concat_convert, device=args.gpu) evaluator.default_name = 'val' trainer.extend(evaluator, trigger=eval_trigger) # Use Vaswan's magical rule of learning rate(Eq. 3 in the paper) # But, the hyperparamter in the paper seems to work well # only with a large batchsize. # If you run on popular setup (e.g. size=48 on 1 GPU), # you may have to change the hyperparamter. # I scaled learning rate by 0.5 consistently. # ("scale" is always multiplied to learning rate.) # If you use a shallow layer network (<=2), # you may not have to change it from the paper setting. if not args.use_fixed_lr: trainer.extend( # VaswaniRule('alpha', d=args.unit, warmup_steps=4000, scale=1.), # VaswaniRule('alpha', d=args.unit, warmup_steps=32000, scale=1.), # VaswaniRule('alpha', d=args.unit, warmup_steps=4000, scale=0.5), # VaswaniRule('alpha', d=args.unit, warmup_steps=16000, scale=1.), VaswaniRule('alpha', d=args.unit, warmup_steps=64000, scale=1.), trigger=(1, 'iteration')) observe_alpha = extensions.observe_value( 'alpha', lambda trainer: trainer.updater.get_optimizer('main').alpha) trainer.extend(observe_alpha, trigger=(1, 'iteration')) # Only if a model gets best validation score, # save (overwrite) the model trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) def translate_one(source, target): words = preprocess.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array([source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one('Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words[i] for i in source]) target = ' '.join([target_words[i] for i in target]) translate_one(source, target) # Gereneration Test trainer.extend(translate, trigger=(min(200, iter_per_epoch), 'iteration')) # Calculate BLEU every half epoch if not args.no_bleu: trainer.extend(CalculateBleu(model, test_data, 'val/main/bleu', device=args.gpu, batch=args.batchsize // 4), trigger=floor_step((iter_per_epoch // 2, 'iteration'))) # Log trainer.extend(extensions.LogReport(trigger=log_trigger), trigger=log_trigger) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'val/main/loss', 'main/perp', 'val/main/perp', 'main/acc', 'val/main/acc', 'val/main/bleu', 'alpha', 'elapsed_time' ]), trigger=log_trigger) print('start training') trainer.run()
for i, filter_height in enumerate(self.filter_height_list): xcs[i] = F.relu(self[i](exs)) chs[i] = F.max_pooling_2d( xcs[i], (self.max_sentence_size + 1 - filter_height)) # Convolution+Poolingの結果の結合 h = F.concat(chs, axis=2) h = F.dropout(F.tanh(self[self.convolution_num + 0](h))) y = self[self.convolution_num + 1](h) return y # 学習 en_path = os.path.join("./", "train/body.txt") source_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( en_path, 900) source_data = preprocess.make_dataset(en_path, source_vocab) source_ids = {word: index for index, word in enumerate(source_vocab)} words = {i: w for w, i in source_ids.items()} N = len(source_data) words[len(words)] = "padding" a = [0] * (1000) b = [1] * (1000) data_t = a + b max_len = 0 for k in range(len(source_data)): if max_len < len(source_data[k]): max_len = len(source_data[k]) for k in range(len(source_data)):
def main(): args = get_args() app = Flask(__name__) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(args.source, args.source_vocab) source_data = preprocess.make_dataset(args.source, source_vocab) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(args.target, args.target_vocab) target_data = preprocess.make_dataset(args.target, target_vocab) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} chainer.using_config('train', False) chainer.no_backprop_mode() model = net.Transformer( args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) chainer.serializers.load_npz(args.model, model) m = MeCab('-Owakati') def translate_one(source): words = preprocess.split_sentence(source) #print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] return ''.join(words) @app.route('/', methods=['GET', 'POST']) def post(): title = '日経記事要約 by transformer' if request.method == 'GET': message = '日経に関連する記事を入力してください' return render_template('index.html', message=message, title=title) elif request.method == 'POST': body = request.form['body'] body = m.parse(normalize(body)) abst = translate_one(body) return render_template('index.html', body=body, title=title, abst=abst) else: return redirect(url_for('index')) app.debug = True app.run(host='localhost')
import tensorflow as tf import tensorflow.keras as K import numpy as np from sklearn.model_selection import train_test_split from tensorflow.keras.preprocessing.sequence import pad_sequences from keras.models import model_from_json from preprocess import make_dataset from model import create_model from keras.optimizers import Adam train_x_char, train_x_bigram, train_y, test_x_char, test_x_bigram, test_y, dev_x_char, dev_y_char, dev_x_bigram, dev_y_bigram, char_vocabulary, bigram_vocabulary, MAX_LENGTH = make_dataset( 'output.utf8', 'dataset.txt') ## ## I am writing the vocabulary into a file so that i can load it in the prediction file to check it against the predict input file ## batch_size = 64 epochs = 3 hidden_size = 100 embedding_size_char = 32 embedding_size_bigram = 64 fv = open('char_vocabulary.txt', 'w') fb = open('bi_vocabulary.txt', 'w') for word in char_vocabulary: fv.write(word + ":" + str(char_vocabulary[word]) + '\n') for word in bigram_vocabulary: fb.write(word + ":" + str(bigram_vocabulary[word]) + '\n') model = create_model(len(char_vocabulary), len(bigram_vocabulary), hidden_size, MAX_LENGTH, train_x_char.shape, train_x_bigram.shape) opt = Adam(lr=1e-3, decay=1e-3 / 200)
def __init__(self, fpath, IsTest=False): directory = fpath dataset = make_dataset(directory) if IsTest: words, tags, ids = make_bert_testset(dataset) else: words, tags, ids = make_bert_dataset(dataset) flat_words, flat_tags, flat_ids, changed_ids = [], [], [], [] count = 0 not_dropped=0 for article_w, article_t, article_id in zip(words, tags, ids): for sentence, tag, id in zip(article_w, article_t, article_id): # Seperated them from the groupings changed = [idx2tag[0][tag2idx[0][temp_tag]] for temp_tag in tag] # which were article wise to make a list of just sentences if set(changed) == {'O'}: count += 1 continue else: not_dropped+=1 flat_words.append(sentence) changed_ids.append(changed) flat_tags.append(changed) flat_ids.append(id) print("{} sentences dropped".format(count)) print("{} sentences NOT dropped".format(not_dropped)) # print("sentence is {} \n tag is {} \n id is {} \n changed_ids is {}".format( # flat_words[:2], flat_tags[:2], flat_ids[:2], changed_ids[:2])) sents, ids = [], [] tags_li = [[] for _ in range(num_task)] if params.dummy_run: flat_words = [flat_words[0]] flat_tags = [flat_tags[0]] flat_ids = [flat_ids[0]] for word, tag, id in zip(flat_words, flat_tags, flat_ids): words = word tags = tag ids.append([id]) sents.append(["[CLS]"] + words + ["[SEP]"]) tmp_tags = [] #We here are just making the tags dict, basically and adding tags for the Sep and start tokens if num_task != 2: for i in range(num_task): tmp_tags.append(['O']*len(tags)) for j, tag in enumerate(tags): if tag != 'O' and tag in ["CD", "ST"]: tmp_tags[i][j] = tag tags_li[i].append(["<PAD>"] + tmp_tags[i] + ["<PAD>"]) elif num_task == 2: tmp_tags.append(['O']*len(tags)) tmp_tags.append(['Non-prop']) for j, tag in enumerate(tags): if tag != 'O' and tag in VOCAB[0]: tmp_tags[0][j] = tag tmp_tags[1] = ['Prop'] for i in range(num_task): tags_li[i].append(["<PAD>"] + tmp_tags[i] + ["<PAD>"]) self.sents, self.ids, self.tags_li = sents, ids, tags_li assert len(sents) == len(ids) == len(tags_li[0])
def main(): parser = argparse.ArgumentParser( description='Chainer example: Att_summary') parser.add_argument('--source', '-s', type=str, default='test/body.txt', help='source sentence list') parser.add_argument('--target', '-t', type=str, default='test/sum.txt', help='target sentence list') parser.add_argument('--source_valid', type=str, default='test/body.txt', help='source sentence list for validation') parser.add_argument('--target_valid', type=str, default='test/sum.txt', help='target sentence list for validation') parser.add_argument('--batchsize', '-b', type=int, default=100, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=200, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=200, help='number of units') parser.add_argument('--layer', '-l', type=int, default=1, help='number of layers') parser.add_argument('--log_interval', type=int, default=20, help='number of iteration to show log') parser.add_argument('--validation_interval', type=int, default=20, help='number of iteration to evlauate the model ' 'with validation dataset') parser.add_argument('--out', '-o', default='result', help='directory to output the result') args = parser.parse_args() # Load pre-processed dataset print('[{}] Loading dataset... (this may take several minutes)'.format( datetime.datetime.now())) en_path = os.path.join("./", args.source) #引数は語彙数 source_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( en_path, 18000) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join("./", args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( fr_path, 18000) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data)] print('Filtered training data size: %d' % len(train_data)) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} #{}の中身を入れ換え target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} # Setup model stack = 1 width = 3 print("aaa") model = Att(args.layer, len(source_ids), len(target_ids), args.unit, args.batchsize, WEIGHT, stack, width) print("aaa") if args.gpu >= 0: print("aaa") chainer.backends.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup optimizer print("aaa") optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Setup iterator train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) # Setup updater and trainer updater = training.StandardUpdater(train_iter, optimizer, converter=seq2seq_pad_concat_convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time' ]), trigger=(1, 'epoch')) if args.source_valid and args.target_valid: en_path = os.path.join("./", args.source_valid) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join("./", args.target_valid) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) print('Validation data: %d' % len(test_data)) print('Validation source unknown ratio: %.2f%%' % (test_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (test_target_unknown * 100)) @chainer.training.make_extension() def translate_one(trainer): #訓練文での生成の場合 defaltは0番目 a, b = map(list, zip(*train_data)) source = a[21] target = b[21] result = model.translate(source)[0] """ #テストランダムの場合 source, target = test_data[numpy.random.choice(len(test_data))] result = model.translate([model.xp.array(source)])[0] """ source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) if WEIGHT: with open("weight/wei.txt", "a", encoding="utf-8") as f: f.write("<body> <fos> " + str(source_sentence) + "\n") f.write("<generation> <fos> " + str(result_sentence) + "\n") def translate_all(trainer): a, b = map(list, zip(*test_data)) for k in range(len(test_data)): source = a[k] result = model.translate(source)[0] result_sentence = ' '.join([target_words[y] for y in result]) with open("summary/result.txt", "a", encoding="utf-8") as f: f.write(str(result_sentence) + "\n") sys.exit(0) """ if TRANS_ALL: trainer.extend(translate_all, trigger=(19, 'epoch')) """ trainer.extend(translate_one, trigger=(args.validation_interval, 'epoch')) if TRANS_ALL: trainer.extend(translate_all, trigger=(20, 'epoch')) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, False, False) trainer.extend( extensions.Evaluator(test_iter, model, converter=seq2seq_pad_concat_convert, device=args.gpu)) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) #trainer.extend(extensions.PlotReport( # ['main/loss','validation/main/loss'] ,x_key='epoch', file_name='loss.png')) print('start training') """ #save_model_load filename = "./result/snapshot_iter_779" serializers.load_npz(filename, trainer) """ trainer.run()
def main(): parser = argparse.ArgumentParser( description='Chainer example: convolutional seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=48, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', type=int, default=512, help='Number of units') parser.add_argument('--layer', '-l', type=int, default=6, help='Number of layers') parser.add_argument('--head', type=int, default=8, help='Number of heads in attention mechanism') parser.add_argument('--dropout', '-d', type=float, default=0.1, help='Dropout rate') parser.add_argument('--model', type=str, help='trained model') parser.add_argument('--input', '-i', type=str, default='./', help='Input directory') parser.add_argument('--source', '-s', type=str, default='europarl-v7.fr-en.en', help='Filename of train data for source language') parser.add_argument('--target', '-t', type=str, default='europarl-v7.fr-en.fr', help='Filename of train data for target language') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--source-vocab', type=int, default=40000, help='Vocabulary size of source language') parser.add_argument('--target-vocab', type=int, default=40000, help='Vocabulary size of target language') parser.add_argument('--no-bleu', '-no-bleu', action='store_true', help='Skip BLEU calculation') parser.add_argument('--use-label-smoothing', action='store_true', help='Use label smoothing for cross entropy') parser.add_argument('--embed-position', action='store_true', help='Use position embedding rather than sinusoid') parser.add_argument('--use-fixed-lr', action='store_true', help='Use fixed learning rate rather than the ' + 'annealing proposed in the paper') parser.add_argument('--disable-mecab', '--dm', action='store_true', help='disalbe mecab toknize') args = parser.parse_args() print(json.dumps(args.__dict__, indent=4)) # Check file en_path = os.path.join(args.input, args.source) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(en_path, args.source_vocab) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(fr_path, args.target_vocab) # print('Original training data size: %d' % len(source_data)) # print('Filtered training data size: %d' % len(train_data)) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} m = MeCab('-Owakati') # Define Model model = net.Transformer(args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) chainer.serializers.load_npz(args.model, model) def translate_one(source, target): words = preprocess.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array([source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) def tokenize(source, target): if args.disable_mecab: return source, target return m.parse(source), m.parse(target) while True: source = input('source> ') target = input('target> ') source, target = tokenize(source, target) translate_one(source, target)
tf.config.experimental.set_virtual_device_configuration( gpus[0], [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=8192) ]) except RuntimeError as e: print(e) if __name__ == "__main__": n_num_intvl = 3 res = [] for j in range(n_num_intvl): Config.NUM_INTVL = (j + 1) * 5 print(f'{Config.NAME} {Config.UNIT_INTVL} {2*Config.NUM_INTVL}') Config.MODELNAME = f"models/{Config.NAME}unit{int(Config.UNIT_INTVL*1000)}_num{Config.NUM_INTVL}.h5" make_dataset(Config.FILENAME) cnn = CNN(Config.MODELNAME) cnn.train() res.append(cnn.test()) shutil.rmtree(Config.DATAPATH) np.save(f"results/{Config.NAME}_{int(Config.UNIT_INTVL*1000)}", res) # Config.UNIT_INTVL = 10/1000 # Config.NUM_INTVL = 5 # print(f'{Config.NAME} {Config.UNIT_INTVL} {2*Config.NUM_INTVL}') # Config.MODELNAME = f"models/{Config.NAME}unit{int(Config.UNIT_INTVL*1000)}_num{Config.NUM_INTVL}.h5" # make_dataset(Config.FILENAME) # cnn = CNN(Config.MODELNAME) # cnn.train() # np.save(f"results/RPM_10_{5}", [cnn.test()]) # shutil.rmtree(Config.DATAPATH)