def run_online(device): # predict labels online for l in sys.stdin: l = l.strip() l_lst = l.split('\t') if not l or l_lst < 2: print('# blank line') continue text1 = nlp_utils.normalize_text(l_lst[0]) text2 = nlp_utils.normalize_text(l_lst[1]) words1 = nlp_utils.split_text(text1, char_based=setup['char_based']) words2 = nlp_utils.split_text(text2, char_based=setup['char_based']) xs = nlp_utils.transform_to_array2([[words1, words2]], vocab, with_label=False) xs = nlp_utils.convert_seq(xs, device=device, with_label=False) with chainer.using_config('train', False), chainer.no_backprop_mode(): prob = model.predict(xs['xs1'], xs['xs2'], softmax=True)[0] answer = int(model.xp.argmax(prob)) score = float(prob[answer]) print('{}\t{:.4f}\t{}\t{}'.format(answer, score, ' '.join(words1), ' '.join(words2)))
def predict_batch(words_batch): xs = nlp_utils.transform_to_array(words_batch, vocab, with_label=False) xs = nlp_utils.convert_seq(xs, device=device, with_label=False) with chainer.using_config('train', False), chainer.no_backprop_mode(): probs = model.predict(xs, softmax=True) answers = model.xp.argmax(probs, axis=1) scores = probs[model.xp.arange(answers.size), answers].tolist() for words, answer, score in zip(words_batch, answers, scores): print('{}\t{:.4f}\t{}'.format(answer, score, ' '.join(words)))
def predict_fn(input_data, model): """ This function receives a NumPy array and makes a prediction on it using the model returned by `model_fn`. The default predictor used by `Chainer` serializes input data to the 'npy' format: https://docs.scipy.org/doc/numpy-1.14.0/neps/npy-format.html The Chainer container provides an overridable pre-processing function `input_fn` that accepts the serialized input data and deserializes it into a NumPy array. `input_fn` is invoked before `predict_fn` and passes its return value to this function (as `input_data`) The Chainer container provides an overridable post-processing function `output_fn` that accepts this function's return value and serializes it back into `npy` format, which the Chainer predictor can deserialize back into a NumPy array on the client. Args: input_data: a numpy array containing the data serialized by the Chainer predictor model: the return value of `model_fn` Returns: a NumPy array containing predictions which will be returned to the client For more on `input_fn`, `predict_fn` and `output_fn`, please visit the sagemaker-python-sdk repository: https://github.com/aws/sagemaker-python-sdk For more on the Chainer container, please visit the sagemaker-chainer-containers repository: https://github.com/aws/sagemaker-chainer-containers """ trained_model, vocab = model words_batch = [] for sentence in input_data.tolist(): text = normalize_text(sentence) words = split_text(text) words_batch.append(words) xs = transform_to_array(words_batch, vocab, with_label=False) xs = convert_seq(xs, with_label=False) with chainer.using_config('train', False), chainer.no_backprop_mode(): probs = trained_model.predict(xs, softmax=True) answers = trained_model.xp.argmax(probs, axis=1) scores = probs[trained_model.xp.arange(answers.size), answers].tolist() output = [] for words, answer, score in zip(words_batch, answers, scores): output.append([' '.join(words), answer, score]) return np.array(output)
def run_online(device): # predict labels online for l in sys.stdin: l = l.strip() if not l: print('# blank line') continue text = nlp_utils.normalize_text(l) words = nlp_utils.split_text(text, char_based=setup['char_based']) xs = nlp_utils.transform_to_array([words], vocab, with_label=False) xs = nlp_utils.convert_seq(xs, device=device, with_label=False) with chainer.using_config('train', False), chainer.no_backprop_mode(): prob = model.predict(xs, softmax=True)[0] answer = int(model.xp.argmax(prob)) score = float(prob[answer]) print('{}\t{:.4f}\t{}'.format(answer, score, ' '.join(words)))
def run_online(gpu): # predict labels online for l in sys.stdin: l = l.strip() if not l: print('# blank line') continue text = nlp_utils.normalize_text(l) words = nlp_utils.split_text(text, char_based=setup['char_based']) xs = nlp_utils.transform_to_array([words], vocab, with_label=False) xs = nlp_utils.convert_seq(xs, device=gpu, with_label=False) with chainer.using_config('train', False), chainer.no_backprop_mode(): prob = model.predict(xs, softmax=True)[0] answer = int(model.xp.argmax(prob)) score = float(prob[answer]) print('{}\t{:.4f}\t{}'.format(answer, score, ' '.join(words)))
def main(): parser = argparse.ArgumentParser( description='RNNとかで曲生成したい!') parser.add_argument('--batchsize', '-b', type=int, default=256, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=1, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--interval', '-i', type=int, default=10, help='プログレスバー,表示とかのインターバル') parser.add_argument('--vec', '-v', type=int, default=32, help='中間層の次元') parser.add_argument('--layer', '-l', type=int, default=2, help='レイヤーの層') parser.add_argument('--frequency', '-f', type=int, default=400, help='保存頻度') parser.add_argument('--model', '-model', default='LSTM', choices=['Word2Vec'], help='Name of encoder model type.') args = parser.parse_args() print(json.dumps(args.__dict__, indent=2)) model = getattr(mymodel, args.model)(481, args.vec, args.layer) # GPUで動かせるのならば動かす if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # optimizerのセットアップ optimizer = chainer.optimizers.Adam() optimizer.setup(model) # データセットのセットアップ trans = Trans() index = trans.getindex() train, val = chainer.datasets.split_dataset_random( index, int(len(index) * 0.8), seed=0) # 2割をvalidation用にとっておく train = chainer.datasets.TransformDataset(train, trans) val = chainer.datasets.TransformDataset(val, trans) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) # 学習をどこまで行うかの設定 stop_trigger = (args.epoch, 'epoch') # uodater, trainerのセットアップ updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=(lambda x, y: tuple(convert_seq(x, y).values())), device=args.gpu, loss_func=model.lossfunc) trainer = training.Trainer(updater, stop_trigger) # testデータでの評価の設定 evaluator = MyEvaluator(val_iter, model, device=args.gpu, eval_func=model.lossfunc) evaluator.trigger = 1, 'epoch' # trainer.extend(evaluator) # 学習済み部分を学習しないように (backwardはされてるっぽい?) if args.model == 6 or args.model == 7: model.base.disable_update() # snapshot(学習中の重み情報)の保存 frequency = args.frequency trainer.extend(extensions.snapshot(), trigger=(frequency, 'iteration')) # trainデータでの評価の表示頻度設定 logreport = extensions.LogReport(trigger=(args.interval, 'iteration')) trainer.extend(logreport) model.logreport = logreport # 各データでの評価の保存設定 if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'val/loss'], 'iteration', trigger=(10, 'iteration'), file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/acc', 'val/acc'], 'iteration', trigger=(10, 'iteration'), file_name='accuracy.png')) # 各データでの評価の表示(欄に関する)設定 trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss_r', 'main/loss', 'val/loss', 'main/acc', 'elapsed_time'])) # プログレスバー表示の設定 trainer.extend(extensions.ProgressBar(update_interval=args.interval)) # 学習済みデータの読み込み設定 # if args.resume: # chainer.serializers.load_npz(args.resume, model, path='updater/model:main/') # なぜかpathを外すと読み込めなくなってしまった 原因不明 # setw(model) trainer.run() print("save resume") chainer.serializers.save_npz("resume.npz", model)
def main(): parser = argparse.ArgumentParser( description='Document Classification Example') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of documents in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of training epochs') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=200, help='Number of units') parser.add_argument('--vocab', '-v', type=int, default=50000, help='Vocabulary size') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of LSMT') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--gradclip', type=float, default=5, help='Gradient clipping threshold') parser.add_argument('--train_file', '-train', default='data/train.seg.csv', help='Trainig data file.') parser.add_argument('--test_file', '-test', default='data/test.seg.csv', help='Test data file.') parser.add_argument('--model', '-m', help='read model parameters from npz file') parser.add_argument('--vcb_file', default='/mnt/gold/users/s18153/prjPyCharm/prjNLP_GPU/data/vocab_train_w_NoReplace.vocab_file', help='Vocabulary data file.') args = parser.parse_args() if os.path.exists(args.vcb_file): # args.vocab_fileの存在確認(作成済みの場合ロード) with open(args.vcb_file, 'rb') as f_vocab_data: train_val = pickle.load(f_vocab_data) else: train_val = data.DocDataset(args.train_file, vocab_size=args.vocab) # make vocab from training data with open(args.vcb_file, 'wb') as f_vocab_save: pickle.dump(train_val, f_vocab_save) # train_val = data.DocDataset(args.train_file, vocab_size=args.vocab) # make vocab from training data # test = [x[0] for x in data.DocDataset(args.test_file, train_val.get_vocab())] # [ データ1[文1[], 文2[], ...], データ2[文1[], 文2[], ...], ... ] # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # 文章,ラベルを同時取得 # test_doc_label = [x for x in data.DocDataset(args.test_file, train_val.get_vocab())] # [ データ1[文1[], 文2[], ...], データ2[文1[], 文2[], ...], ... ] test_doc_label = data.DocDataset(args.test_file, train_val.get_vocab()) test_doc = [x[0] for x in test_doc_label] test_label = [x[1] for x in test_doc_label] test_iter = iterators.SerialIterator(test_doc, args.batchsize, repeat=False, shuffle=False) test_label_iter = iterators.SerialIterator(test_label, args.batchsize, repeat=False, shuffle=False) # test_doc_label_iter = iterators.SerialIterator(test_doc_label, args.batchsize, repeat=False, shuffle=False) model = nets.DocClassify(n_vocab=args.vocab+1, n_units=args.unit, n_layers=args.layer, n_out=4, dropout=args.dropout) # load npzができなくなる→解消? if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.model: serializers.load_npz(args.model, model, 'updater/model:main/predictor/') confusion_mat = np.zeros([4, 4]) # [label, prediction] with chainer.using_config('train', False): # ChainerにインプリされたEvaluatorでモデル計算を実行できないかと試行したものだが結果は不明 # test_eval = extensions.Evaluator(test_doc_label_iter, model, converter=convert_seq, device=args.gpu) # test_result = test_eval() # while True: # result = model(convert_seq(test_iter.next(), device=args.gpu, with_label=False)) # test_label_batch = test_label_iter.next() for (label_batch, each_testinput_batch) in zip(test_label_iter, test_iter): result = model(convert_seq(each_testinput_batch, device=args.gpu, with_label=False)) predict = np.argmax(result.array, axis=1) for (each_label, each_predict) in zip(label_batch, predict): confusion_mat[each_label][chainer.cuda.to_cpu(each_predict)] += 1 print(confusion_mat) # dummy_val = 'dummy data' time_now = datetime.now().strftime('%Y%m%d%H%M%S') save_path = '/mnt/gold/users/s18153/prjPyCharm/prjNLP_GPU/data/vocab_train_w_NoReplace.saved_' save_val_str = get_str_of_val_name_on_code(confusion_mat)[0] # for (each_val, each_val_str) in zip(save_val, save_val_str): # with open(save_path + each_val_str + time_now, 'wb') as f_save: # pickle.dump(each_val, f_save) with open(save_path + save_val_str + '_' + time_now, 'wb') as f_save: pickle.dump(confusion_mat, f_save) pass # for breakpoint
def main(): parser = argparse.ArgumentParser() parser.add_argument('--load', required=True) args_dir = os.path.join(parser.parse_args().load, 'args.json') with open(args_dir) as f: args = Bunch(json.load(f)) print(json.dumps(args.__dict__, indent=2)) # Load a dataset with open(args.vocab_path) as f: vocab = json.load(f) if args.dataset == 'dbpedia': train, test, vocab = text_datasets.get_dbpedia( vocab=vocab, char_based=args.char_based) elif args.dataset == 'sst': train, test, vocab = text_datasets.get_sst(char_based=args.char_based) elif args.dataset.startswith('imdb.'): train, test, vocab = text_datasets.get_imdb( vocab=vocab, fine_grained=args.dataset.endswith('.fine'), char_based=args.char_based) elif args.dataset in [ 'TREC', 'stsa.binary', 'stsa.fine', 'custrev', 'mpqa', 'rt-polarity', 'subj' ]: train, test, vocab = text_datasets.get_other_text_dataset( args.dataset, vocab=vocab, char_based=args.char_based) print('# train data: {}'.format(len(train))) print('# test data: {}'.format(len(test))) print('# vocab: {}'.format(len(vocab))) n_class = len(set([int(d[1]) for d in train])) print('# class: {}'.format(n_class)) # i_to_word = {v: k for k, v in vocab.items()} # FIXME args.batchsize = 64 max_beam_size = 5 # train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if args.dataset == 'snli': model = nets.DoubleMaxClassifier(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, n_class=n_class, dropout=args.dropout) else: model = nets.SingleMaxClassifier(n_layers=args.layer, n_vocab=len(vocab), n_units=args.unit, n_class=n_class, dropout=args.dropout) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU chainer.serializers.load_npz(args.model_path, model) checkpoint = [] for batch_idx, batch in enumerate(tqdm(test_iter)): # if batch_idx > 10: # break batch = convert_seq(batch, device=args.gpu) xs = batch['xs'] reduced_xs, removed_indices = get_rawr(model, xs, max_beam_size=max_beam_size) xp = cupy.get_array_module(*xs) n_finals = [len(r) for r in reduced_xs] reduced_xs = list(itertools.chain(*reduced_xs)) removed_indices = list(itertools.chain(*removed_indices)) reduced_xs = [xp.asarray(x) for x in reduced_xs] reduced_xs = convert_seq(reduced_xs, device=args.gpu, with_label=False) with chainer.using_config('train', False): ss_0 = xp.asnumpy(model.predict(xs, softmax=True)) ss_1 = xp.asnumpy(model.predict(reduced_xs, softmax=True)) ys_0 = np.argmax(ss_0, axis=1) ys_1 = np.argmax(ss_1, axis=1) start = 0 for example_idx in range(len(xs)): oi = xs[example_idx].tolist() # original input op = int(ys_0[example_idx]) # original predictoin oos = ss_0[example_idx] # original output distribution label = int(batch['ys'][example_idx]) checkpoint.append([]) for i in range(start, start + n_finals[example_idx]): ri = reduced_xs[i].tolist() rp = int(ys_1[i]) rs = ss_1[i] rr = removed_indices[i] entry = { 'original_input': oi, 'reduced_input': ri, 'original_prediction': op, 'reduced_prediction': rp, 'original_scores': oos, 'reduced_scores': rs, 'removed_indices': rr, 'label': label } checkpoint[-1].append(entry) with open(os.path.join(args.out, 'rawr_dev.pkl'), 'wb') as f: pickle.dump(checkpoint, f)