def setup_dynet(random_seed, weight_decay, mem, cuda): """ sets the dynet parameters and returns a dictionary storing these parameters that can be passed to the model as additional parameters in order to store them :param random_seed: :param weight_decay: :param mem: :return: """ dynet_params = {} dyparams = dn.DynetParams() dyparams.set_random_seed(random_seed) dynet_params['random_seed'] = random_seed dyparams.set_weight_decay(weight_decay) dynet_params['weight_decay'] = weight_decay dyparams.set_autobatch(True) dynet_params['autobatch'] = True dyparams.set_mem(mem) dynet_params['mem'] = mem # Initialize with the given parameters dyparams.init() return dynet_params
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: dy_params.set_autobatch(True) else: raise Exception('Tagger currently only supports autobatching.' 'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz') dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = False # FIXME These should be registered instead exporter_type = kwargs.get('exporter_type', 'default') if exporter_type == 'default': from mead.tf.exporters import TaggerTensorFlowExporter backend.exporter = TaggerTensorFlowExporter elif exporter_type == 'preproc': from mead.tf.preproc_exporters import TaggerTensorFlowPreProcExporter import mead.tf.preprocessors backend.exporter = TaggerTensorFlowPreProcExporter backend.load(self.task_name()) return backend
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: dy_params.set_autobatch(True) else: raise Exception('Tagger currently only supports autobatching.' 'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz') dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = False backend.load(self.task_name()) return backend
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['train']['trainer_type'] = 'autobatch' dy_params.set_autobatch(True) batched = False else: batched = True dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched} elif backend.name == 'tf': # FIXME this should be registered as well! exporter_type = kwargs.get('exporter_type', 'default') if exporter_type == 'default': from mead.tf.exporters import ClassifyTensorFlowExporter backend.exporter = ClassifyTensorFlowExporter elif exporter_type == 'preproc': from mead.tf.preproc_exporters import ClassifyTensorFlowPreProcExporter import mead.tf.preprocessors backend.exporter = ClassifyTensorFlowPreProcExporter backend.load(self.task_name()) return backend
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} self.config_params['preproc']['show_ex'] = show_examples if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['train']['trainer_type'] = 'autobatch' dy_params.set_autobatch(True) batched = False else: batched = True dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = True backend.load(self.task_name()) return backend
def init_dynet(args): """initialize DyNet""" dyparams = dynet.DynetParams() # Fetch the command line arguments (optional) dyparams.from_args() # Set some parameters manualy (see the command line arguments documentation) dyparams.set_random_seed(args.seed) # Initialize with the given parameters dyparams.init()
def _setup_task(self): backend = self.config_params.get('backend', 'tensorflow') if backend == 'pytorch': print('PyTorch backend') from baseline.pytorch import long_0_tensor_alloc from baseline.pytorch import tensor_reverse_2nd as rev2nd import baseline.pytorch.classify as classify self.config_params['preproc']['vec_alloc'] = long_0_tensor_alloc else: self.config_params['preproc']['vec_alloc'] = np.zeros if backend == 'keras': print('Keras backend') import baseline.keras.classify as classify from baseline.data import reverse_2nd as rev2nd elif backend == 'dynet': print('Dynet backend') import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['model']['batched'] = False dy_params.set_autobatch(True) dy_params.init() import baseline.dy.classify as classify from baseline.data import reverse_2nd as rev2nd self.config_params['preproc']['trim'] = True else: print('TensorFlow backend') import baseline.tf.classify as classify from baseline.data import reverse_2nd as rev2nd from mead.tf.exporters import ClassifyTensorFlowExporter self.ExporterType = ClassifyTensorFlowExporter self.task = classify if self.config_params['preproc'].get('clean', False) is True: self.config_params['preproc'][ 'clean_fn'] = baseline.TSVSeqLabelReader.do_clean print('Clean') elif self.config_params['preproc'].get('lower', False) is True: self.config_params['preproc']['clean_fn'] = baseline.lowercase print('Lower') else: self.config_params['preproc']['clean_fn'] = None self.config_params['preproc'][ 'src_vec_trans'] = rev2nd if self.config_params['preproc'].get( 'rev', False) else None
def _setup_task(self): backend = self.config_params.get('backend', 'tensorflow') if backend == 'pytorch': print('PyTorch backend') from baseline.pytorch import long_0_tensor_alloc as vec_alloc from baseline.pytorch import tensor_shape as vec_shape import baseline.pytorch.tagger as tagger self.config_params['preproc']['vec_alloc'] = vec_alloc self.config_params['preproc']['vec_shape'] = vec_shape self.config_params['preproc']['trim'] = True elif backend == 'dynet': print('Dynet backend') import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['model']['batched'] = False dy_params.set_autobatch(True) else: raise Exception( 'Tagger currently only supports autobatching.' 'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz' ) #self.config_params['model']['batched'] = True #dy_params.set_autobatch(False) dy_params.init() import baseline.dy.tagger as tagger self.config_params['preproc']['vec_alloc'] = np.zeros self.config_params['preproc']['vec_shape'] = np.shape self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['vec_alloc'] = np.zeros self.config_params['preproc']['vec_shape'] = np.shape print('TensorFlow backend') self.config_params['preproc']['trim'] = False import baseline.tf.tagger as tagger import mead.tf self.ExporterType = mead.tf.TaggerTensorFlowExporter self.task = tagger if self.config_params['preproc'].get('web-cleanup', False) is True: self.config_params['preproc'][ 'word_trans_fn'] = baseline.CONLLSeqReader.web_cleanup print('Web-ish data cleanup') elif self.config_params['preproc'].get('lower', False) is True: self.config_params['preproc']['word_trans_fn'] = baseline.lowercase print('Lower') else: self.config_params['preproc']['word_trans_fn'] = None
def _setup_task(self): # If its not vanilla seq2seq, dont bother reversing do_reverse = self.config_params['model']['model_type'] == 'default' backend = self.config_params.get('backend', 'tensorflow') if backend == 'pytorch': print('PyTorch backend') from baseline.pytorch import long_0_tensor_alloc as vec_alloc from baseline.pytorch import tensor_shape as vec_shape from baseline.pytorch import tensor_reverse_2nd as rev2nd import baseline.pytorch.seq2seq as seq2seq self.config_params['preproc']['vec_alloc'] = vec_alloc self.config_params['preproc']['vec_shape'] = vec_shape src_vec_trans = rev2nd if do_reverse else None self.config_params['preproc']['word_trans_fn'] = src_vec_trans self.config_params['preproc'][ 'show_ex'] = baseline.pytorch.show_examples_pytorch self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['vec_alloc'] = np.zeros self.config_params['preproc']['vec_shape'] = np.shape self.config_params['preproc']['trim'] = False src_vec_trans = baseline.reverse_2nd if do_reverse else None self.config_params['preproc']['word_trans_fn'] = src_vec_trans if backend == 'dynet': print('Dynet backend') import _dynet self.config_params['preproc']['trim'] = True dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) dy_params.init() import baseline.dy.seq2seq as seq2seq self.config_params['preproc'][ 'show_ex'] = baseline.dy.show_examples_dynet self.config_params['preproc']['trim'] = True else: import baseline.tf.seq2seq as seq2seq self.config_params['preproc'][ 'show_ex'] = baseline.tf.show_examples_tf from mead.tf.exporters import Seq2SeqTensorFlowExporter self.ExporterType = Seq2SeqTensorFlowExporter self.task = seq2seq
def init(opts): # todo: manipulating sys.argv utils.zlog("Using BACKEND of DYNET on %s." % (opts["dynet-devices"], )) params = dy.DynetParams() temp = sys.argv sys.argv = [ temp[0], "--dynet-mem", opts["dynet-mem"], "--dynet-autobatch", opts["dynet-autobatch"], "--dynet-devices", opts["dynet-devices"], "--dynet-seed", opts["dynet-seed"] ] DY_CONFIG.immediate_compute = opts["dynet-immed"] params.from_args(None) params.init() sys.argv = temp if "GPU" not in opts["dynet-devices"]: global topk topk = topk_cpu global count_larger count_larger = cl_cpu utils.zlog("Currently using numpy for topk_cpu/count_larger.")
def _setup_task(self): backend = self.config_params.get('backend', 'tensorflow') if backend == 'pytorch': print('PyTorch backend') from baseline.pytorch import long_0_tensor_alloc as vec_alloc from baseline.pytorch import tensor_shape as vec_shape import baseline.pytorch.lm as lm self.config_params['preproc']['vec_alloc'] = vec_alloc self.config_params['preproc']['vec_shape'] = vec_shape self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['vec_alloc'] = np.zeros self.config_params['preproc']['vec_shape'] = np.shape if backend == 'dynet': print('Dynet backend') import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) dy_params.init() self.config_params['preproc']['trim'] = False import baseline.dy.lm as lm else: print('TensorFlow backend') self.config_params['preproc']['trim'] = False import baseline.tf.lm as lm self.task = lm if self.config_params.get('web-cleanup', False) is True: self.config_params['preproc'][ 'word_trans_fn'] = baseline.CONLLSeqReader.web_cleanup print('Web-ish data cleanup') elif self.config_params.get('lower', False) is True: self.config_params['preproc']['word_trans_fn'] = baseline.lowercase print('Lower') else: self.config_params['preproc']['word_trans_fn'] = None
ax.set_xticklabels([u'begin'] + list(input_seq) + [u'end']) ax.set_yticklabels(list(output_seq) + [u'end']) # set title input_word = u' '.join(input_seq) output_word = u' '.join(output_seq) ax.set_title(u'attention-based alignment:\n{}->\n{}'.format( input_word, output_word)) plt.savefig(filename) plt.close() if __name__ == '__main__': arguments = docopt(__doc__) dnparams = dn.DynetParams() if arguments['--seed']: dnparams.set_random_seed(int(arguments['--seed'])) dnparams.init() max_prediction_len = int( arguments['--max-pred']) if arguments['--max-pred'] else None plot_param = arguments['--plot'] beam_param = int(arguments['--beam-size']) results_file_path_param = arguments['RESULTS_PATH'] main(arguments['TRAIN_INPUTS_PATH'], arguments['TRAIN_OUTPUTS_PATH'], arguments['DEV_INPUTS_PATH'], arguments['DEV_OUTPUTS_PATH'], arguments['TEST_INPUTS_PATH'], arguments['TEST_OUTPUTS_PATH'], arguments['RESULTS_PATH'], arguments['VOCAB_INPUT_PATH'], arguments['VOCAB_OUTPUT_PATH'], int(arguments['--input-dim']),
def main(): parser = argparse.ArgumentParser( description= 'Convolutional Neural Networks for Sentence Classification in DyNet') parser.add_argument('--gpu', type=int, default=0, help='GPU ID to use. For cpu, set -1 [default: 0]') parser.add_argument( '--train_x_path', type=str, default='./data/train_x.txt', help='File path of train x data [default: `./data/train_x.txt`]') parser.add_argument( '--train_y_path', type=str, default='./data/train_y.txt', help='File path of train y data [default: `./data/train_x.txt`]') parser.add_argument( '--valid_x_path', type=str, default='./data/valid_x.txt', help='File path of valid x data [default: `./data/valid_x.txt`]') parser.add_argument( '--valid_y_path', type=str, default='./data/valid_y.txt', help='File path of valid y data [default: `./data/valid_y.txt`]') parser.add_argument('--n_epochs', type=int, default=10, help='Number of epochs [default: 10]') parser.add_argument('--batch_size', type=int, default=64, help='Mini batch size [default: 64]') parser.add_argument('--win_sizes', type=int, nargs='*', default=[3, 4, 5], help='Window sizes of filters [default: [3, 4, 5]]') parser.add_argument( '--num_fil', type=int, default=100, help='Number of filters in each window size [default: 100]') parser.add_argument('--s', type=float, default=3.0, help='L2 norm constraint on w [default: 3.0]') parser.add_argument('--dropout_prob', type=float, default=0.5, help='Dropout probability [default: 0.5]') parser.add_argument( '--v_strategy', type=str, default='static', help= 'Embedding strategy. rand: Random initialization. static: Load pretrained embeddings and do not update during the training. non-static: Load pretrained embeddings and update during the training. [default: static]' ) parser.add_argument( '--alloc_mem', type=int, default=4096, help='Amount of memory to allocate [mb] [default: 4096]') args = parser.parse_args() print(args) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) N_EPOCHS = args.n_epochs WIN_SIZES = args.win_sizes BATCH_SIZE = args.batch_size EMB_DIM = 300 OUT_DIM = 1 L2_NORM_LIM = args.s NUM_FIL = args.num_fil DROPOUT_PROB = args.dropout_prob V_STRATEGY = args.v_strategy ALLOC_MEM = args.alloc_mem if V_STRATEGY in ['rand', 'static', 'non-static']: NUM_CHA = 1 else: NUM_CHA = 2 # FILE paths W2V_PATH = './GoogleNews-vectors-negative300.bin' TRAIN_X_PATH = args.train_x_path TRAIN_Y_PATH = args.train_y_path VALID_X_PATH = args.valid_x_path VALID_Y_PATH = args.valid_y_path # DyNet setting dyparams = dy.DynetParams() dyparams.set_random_seed(RANDOM_SEED) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Load pretrained embeddings pretrained_model = gensim.models.KeyedVectors.load_word2vec_format( W2V_PATH, binary=True) vocab = pretrained_model.wv.vocab.keys() w2v = pretrained_model.wv # Build dataset ======================================================================================================= w2c = build_w2c(TRAIN_X_PATH, vocab=vocab) w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk') train_x, train_y = build_dataset(TRAIN_X_PATH, TRAIN_Y_PATH, w2i, unk='unk') valid_x, valid_y = build_dataset(VALID_X_PATH, VALID_Y_PATH, w2i, unk='unk') train_x, train_y = sort_data_by_length(train_x, train_y) valid_x, valid_y = sort_data_by_length(valid_x, valid_y) VOCAB_SIZE = len(w2i) print('VOCAB_SIZE:', VOCAB_SIZE) V_init = init_V(w2v, w2i) with open(os.path.join(RESULTS_DIR, './w2i.dump'), 'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'), 'wb') as f_i2w: pickle.dump(w2i, f_w2i) pickle.dump(i2w, f_i2w) # Build model ================================================================================= model = dy.Model() trainer = dy.AdamTrainer(model) # V1 V1 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM)) if V_STRATEGY in ['static', 'non-static', 'multichannel']: V1.init_from_array(V_init) if V_STRATEGY in ['static', 'multichannel']: V1_UPDATE = False else: # 'rand', 'non-static' V1_UPDATE = True make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM) # V2 if V_STRATEGY == 'multichannel': V2 = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM)) V2.init_from_array(V_init) V2_UPDATE = True make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM) layers = [ CNNText(model, EMB_DIM, WIN_SIZES, NUM_CHA, NUM_FIL, dy.tanh, DROPOUT_PROB), Dense(model, 3 * NUM_FIL, OUT_DIM, dy.logistic) ] # Train model ================================================================================ n_batches_train = math.ceil(len(train_x) / BATCH_SIZE) n_batches_valid = math.ceil(len(valid_x) / BATCH_SIZE) start_time = time.time() for epoch in range(N_EPOCHS): # Train loss_all_train = [] pred_all_train = [] for i in tqdm(range(n_batches_train)): # Create a new computation graph dy.renew_cg() associate_parameters(layers) # Create a mini batch start = i * BATCH_SIZE end = start + BATCH_SIZE x = build_batch(train_x[start:end], w2i, max(WIN_SIZES)).T t = np.array(train_y[start:end]) sen_len = x.shape[0] if V_STRATEGY in ['rand', 'static', 'non-static']: x_embs = dy.concatenate_cols( [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x]) x_embs = dy.transpose(x_embs) x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1)) else: # multichannel x_embs1 = dy.concatenate_cols( [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x]) x_embs2 = dy.concatenate_cols( [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x]) x_embs1 = dy.transpose(x_embs1) x_embs2 = dy.transpose(x_embs2) x_embs = dy.concatenate([x_embs1, x_embs2], d=2) t = dy.inputTensor(t, batched=True) y = forwards(layers, x_embs, test=False) mb_loss = dy.mean_batches(dy.binary_log_loss(y, t)) # Forward prop loss_all_train.append(mb_loss.value()) pred_all_train.extend(list(binary_pred(y.npvalue().flatten()))) # Backward prop mb_loss.backward() trainer.update() # L2 norm constraint layers[1].scale_W(L2_NORM_LIM) # Make padding embs zero if V_STRATEGY in ['rand', 'non-static']: make_emb_zero(V1, [w2i['<s>'], w2i['</s>']], EMB_DIM) elif V_STRATEGY in ['multichannel']: make_emb_zero(V2, [w2i['<s>'], w2i['</s>']], EMB_DIM) # Valid loss_all_valid = [] pred_all_valid = [] for i in range(n_batches_valid): # Create a new computation graph dy.renew_cg() associate_parameters(layers) # Create a mini batch start = i * BATCH_SIZE end = start + BATCH_SIZE x = build_batch(valid_x[start:end], w2i, max(WIN_SIZES)).T t = np.array(valid_y[start:end]) sen_len = x.shape[0] if V_STRATEGY in ['rand', 'static', 'non-static']: x_embs = dy.concatenate_cols( [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x]) x_embs = dy.transpose(x_embs) x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1)) else: # multichannel x_embs1 = dy.concatenate_cols( [dy.lookup_batch(V1, x_t, update=V1_UPDATE) for x_t in x]) x_embs2 = dy.concatenate_cols( [dy.lookup_batch(V2, x_t, update=V2_UPDATE) for x_t in x]) x_embs1 = dy.transpose(x_embs1) x_embs2 = dy.transpose(x_embs2) x_embs = dy.concatenate([x_embs1, x_embs2], d=2) t = dy.inputTensor(t, batched=True) y = forwards(layers, x_embs, test=True) mb_loss = dy.mean_batches(dy.binary_log_loss(y, t)) # Forward prop loss_all_valid.append(mb_loss.value()) pred_all_valid.extend(list(binary_pred(y.npvalue().flatten()))) print( 'EPOCH: %d, Train Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Valid Loss:: %.3f (F1:: %.3f, Acc:: %.3f), Time:: %.3f[s]' % ( epoch + 1, np.mean(loss_all_train), f1_score(train_y, pred_all_train), accuracy_score(train_y, pred_all_train), np.mean(loss_all_valid), f1_score(valid_y, pred_all_valid), accuracy_score(valid_y, pred_all_valid), time.time() - start_time, )) # Save model ========================================================================================================================= if V_STRATEGY in ['rand', 'static', 'non-static']: dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)), [V1] + layers) else: dy.save(os.path.join(RESULTS_DIR, './model_e' + str(epoch + 1)), [V1, V2] + layers)
import sys # No support for python2 if sys.version_info[0] == 2: raise RuntimeError("XNMT does not support python2 any longer.") package_dir = os.path.dirname(os.path.abspath(__file__)) if package_dir not in sys.path: sys.path.append(package_dir) import logging logger = logging.getLogger('xnmt') yaml_logger = logging.getLogger('yaml') import _dynet dyparams = _dynet.DynetParams() dyparams.from_args() # all Serializable objects must be imported here in order to be parsable # using the !Classname YAML syntax import xnmt.attender import xnmt.batcher import xnmt.conv import xnmt.decoder import xnmt.embedder import xnmt.eval_task import xnmt.evaluator import xnmt.exp_global import xnmt.experiment import xnmt.ff
import math import json from __main__ import args if args.gpus == 0: import _dynet as dy dyparams = dy.DynetParams() else: import _gdynet as dy dyparams = dy.DynetParams() dyparams.set_requested_gpus(args.gpus) dyparams.set_mem(args.memory) dyparams.set_random_seed(args.seed) dyparams.init() from lstm_common import * from sklearn.base import BaseEstimator NUM_LAYERS = 2 LSTM_HIDDEN_DIM = 60 LEMMA_DIM = 50 POS_DIM = 4 DEP_DIM = 5 DIR_DIM = 1 EMPTY_PATH = ((0, 0, 0, 0), ) LOSS_EPSILON = 0.0 # 0.01 MINIBATCH_SIZE = 100
def main(): parser = argparse.ArgumentParser(description='Selective Encoding for Abstractive Sentence Summarization in DyNet') parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: -1]') parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs [default: 3]') parser.add_argument('--n_train', type=int, default=3803957, help='Number of training data (up to 3803957 in gigaword) [default: 3803957]') parser.add_argument('--n_valid', type=int, default=189651, help='Number of validation data (up to 189651 in gigaword) [default: 189651])') parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]') parser.add_argument('--vocab_size', type=int, default=124404, help='Vocabulary size [default: 124404]') parser.add_argument('--emb_dim', type=int, default=256, help='Embedding size [default: 256]') parser.add_argument('--hid_dim', type=int, default=256, help='Hidden state size [default: 256]') parser.add_argument('--maxout_dim', type=int, default=2, help='Maxout size [default: 2]') parser.add_argument('--alloc_mem', type=int, default=10000, help='Amount of memory to allocate [mb] [default: 10000]') args = parser.parse_args() print(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu N_EPOCHS = args.n_epochs N_TRAIN = args.n_train N_VALID = args.n_valid BATCH_SIZE = args.batch_size VOCAB_SIZE = args.vocab_size EMB_DIM = args.emb_dim HID_DIM = args.hid_dim MAXOUT_DIM = args.maxout_dim ALLOC_MEM = args.alloc_mem # File paths TRAIN_X_FILE = './data/train.article.txt' TRAIN_Y_FILE = './data/train.title.txt' VALID_X_FILE = './data/valid.article.filter.txt' VALID_Y_FILE = './data/valid.title.filter.txt' # DyNet setting dyparams = dy.DynetParams() dyparams.set_autobatch(True) dyparams.set_random_seed(RANDOM_SEED) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Build dataset dataset = Dataset( TRAIN_X_FILE, TRAIN_Y_FILE, VALID_X_FILE, VALID_Y_FILE, vocab_size=VOCAB_SIZE, batch_size=BATCH_SIZE, n_train=N_TRAIN, n_valid=N_VALID ) VOCAB_SIZE = len(dataset.w2i) print('VOCAB_SIZE', VOCAB_SIZE) # Build model model = dy.Model() trainer = dy.AdamTrainer(model) V = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM)) encoder = SelectiveBiGRU(model, EMB_DIM, HID_DIM) decoder = AttentionalGRU(model, EMB_DIM, HID_DIM, MAXOUT_DIM, VOCAB_SIZE) # Train model start_time = time.time() for epoch in range(N_EPOCHS): # Train loss_all_train = [] dataset.reset_train_iter() for train_x_mb, train_y_mb in tqdm(dataset.train_iter): # Create a new computation graph dy.renew_cg() associate_parameters([encoder, decoder]) losses = [] for x, t in zip(train_x_mb, train_y_mb): t_in, t_out = t[:-1], t[1:] # Encoder x_embs = [dy.lookup(V, x_t) for x_t in x] hp, hb_1 = encoder(x_embs) # Decoder decoder.set_initial_states(hp, hb_1) t_embs = [dy.lookup(V, t_t) for t_t in t_in] y = decoder(t_embs) # Loss loss = dy.esum( [dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)] ) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_train.append(mb_loss.value()) # Backward prop mb_loss.backward() trainer.update() # Valid loss_all_valid = [] dataset.reset_valid_iter() for valid_x_mb, valid_y_mb in dataset.valid_iter: # Create a new computation graph dy.renew_cg() associate_parameters([encoder, decoder]) losses = [] for x, t in zip(valid_x_mb, valid_y_mb): t_in, t_out = t[:-1], t[1:] # Encoder x_embs = [dy.lookup(V, x_t) for x_t in x] hp, hb_1 = encoder(x_embs) # Decoder decoder.set_initial_states(hp, hb_1) t_embs = [dy.lookup(V, t_t) for t_t in t_in] y = decoder(t_embs) # Loss loss = dy.esum( [dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)] ) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_valid.append(mb_loss.value()) print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f, Time: %.3f[s]' % ( epoch+1, np.mean(loss_all_train), np.mean(loss_all_valid), time.time()-start_time )) # Save model dy.save('./model_e'+str(epoch+1), [V, encoder, decoder]) with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump', 'wb') as f_i2w: pickle.dump(dataset.w2i, f_w2i) pickle.dump(dataset.i2w, f_i2w)
def main(): parser = argparse.ArgumentParser(description='A Neural Attention Model for Abstractive Sentence Summarization in DyNet') parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: 0]') parser.add_argument('--n_epochs', type=int, default=10, help='Number of epochs [default: 10]') parser.add_argument('--n_train', type=int, default=3803957, help='Number of training data (up to 3803957 in gigaword) [default: 3803957]') parser.add_argument('--n_valid', type=int, default=189651, help='Number of validation data (up to 189651 in gigaword) [default: 189651]') parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]') parser.add_argument('--vocab_size', type=int, default=60000, help='Vocabulary size [default: 60000]') parser.add_argument('--emb_dim', type=int, default=256, help='Embedding size [default: 256]') parser.add_argument('--hid_dim', type=int, default=256, help='Hidden state size [default: 256]') parser.add_argument('--encoder_type', type=str, default='attention', help='Encoder type. bow: Bag-of-words encoder. attention: Attention-based encoder [default: attention]') parser.add_argument('--c', type=int, default=5, help='Window size in neural language model [default: 5]') parser.add_argument('--q', type=int, default=2, help='Window size in attention-based encoder [default: 2]') parser.add_argument('--alloc_mem', type=int, default=4096, help='Amount of memory to allocate [mb] [default: 4096]') args = parser.parse_args() print(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu N_EPOCHS = args.n_epochs N_TRAIN = args.n_train N_VALID = args.n_valid BATCH_SIZE = args.batch_size VOCAB_SIZE = args.vocab_size EMB_DIM = args.emb_dim HID_DIM = args.hid_dim ENCODER_TYPE = args.encoder_type C = args.c Q = args.q ALLOC_MEM = args.alloc_mem # File paths TRAIN_X_FILE = './data/train.article.txt' TRAIN_Y_FILE = './data/train.title.txt' VALID_X_FILE = './data/valid.article.filter.txt' VALID_Y_FILE = './data/valid.title.filter.txt' # DyNet setting dyparams = dy.DynetParams() dyparams.set_autobatch(True) dyparams.set_random_seed(RANDOM_STATE) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Build dataset ==================================================================================== w2c = build_word2count(TRAIN_X_FILE, n_data=N_TRAIN) w2c = build_word2count(TRAIN_Y_FILE, w2c=w2c, n_data=N_TRAIN) train_X, w2i, i2w = build_dataset(TRAIN_X_FILE, w2c=w2c, padid=False, eos=True, unksym='<unk>', target=False, n_data=N_TRAIN, vocab_size=VOCAB_SIZE) train_y, _, _ = build_dataset(TRAIN_Y_FILE, w2i=w2i, target=True, n_data=N_TRAIN) valid_X, _, _ = build_dataset(VALID_X_FILE, w2i=w2i, target=False, n_data=N_VALID) valid_y, _, _ = build_dataset(VALID_Y_FILE, w2i=w2i, target=True, n_data=N_VALID) VOCAB_SIZE = len(w2i) OUT_DIM = VOCAB_SIZE print('VOCAB_SIZE:', VOCAB_SIZE) # Build model ====================================================================================== model = dy.Model() trainer = dy.AdamTrainer(model) rush_abs = ABS(model, EMB_DIM, HID_DIM, VOCAB_SIZE, Q, C, encoder_type=ENCODER_TYPE) # Padding train_y = [[w2i['<s>']]*(C-1)+instance_y for instance_y in train_y] valid_y = [[w2i['<s>']]*(C-1)+instance_y for instance_y in valid_y] n_batches_train = math.ceil(len(train_X)/BATCH_SIZE) n_batches_valid = math.ceil(len(valid_X)/BATCH_SIZE) start_time = time.time() for epoch in range(N_EPOCHS): # Train train_X, train_y = shuffle(train_X, train_y) loss_all_train = [] for i in tqdm(range(n_batches_train)): # Create a new computation graph dy.renew_cg() rush_abs.associate_parameters() # Create a mini batch start = i*BATCH_SIZE end = start + BATCH_SIZE train_X_mb = train_X[start:end] train_y_mb = train_y[start:end] losses = [] for x, t in zip(train_X_mb, train_y_mb): t_in, t_out = t[:-1], t[C:] y = rush_abs(x, t_in) loss = dy.esum([dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)]) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_train.append(mb_loss.value()) # Backward prop mb_loss.backward() trainer.update() # Valid loss_all_valid = [] for i in range(n_batches_valid): # Create a new computation graph dy.renew_cg() rush_abs.associate_parameters() # Create a mini batch start = i*BATCH_SIZE end = start + BATCH_SIZE valid_X_mb = valid_X[start:end] valid_y_mb = valid_y[start:end] losses = [] for x, t in zip(valid_X_mb, valid_y_mb): t_in, t_out = t[:-1], t[C:] y = rush_abs(x, t_in) loss = dy.esum([dy.pickneglogsoftmax(y_t, t_t) for y_t, t_t in zip(y, t_out)]) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_valid.append(mb_loss.value()) print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f' % ( epoch+1, np.mean(loss_all_train), np.mean(loss_all_valid) )) # Save model ======================================================================== dy.save('./model_e'+str(epoch+1), [rush_abs]) with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump', 'wb') as f_i2w: pickle.dump(w2i, f_w2i) pickle.dump(i2w, f_i2w)
def main(): parser = argparse.ArgumentParser( description= 'Deep Recurrent Generative Decoder for Abstractive Text Summarization in DyNet' ) parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: -]') parser.add_argument('--n_test', type=int, default=189651, help='Number of test examples [default: 189651]') parser.add_argument('--beam_size', type=int, default=5, help='Beam size [default: 5]') parser.add_argument('--max_len', type=int, default=100, help='Maximum length of decoding [default: 100]') parser.add_argument('--model_file', type=str, default='./model_e1', help='Trained model file path [default: ./model_e1]') parser.add_argument( '--input_file', type=str, default='./data/valid.article.filter.txt', help='Test file path [default: ./data/valid.article.filter.txt]') parser.add_argument('--output_file', type=str, default='./pred_y.txt', help='Output file path [default: ./pred_y.txt]') parser.add_argument('--w2i_file', type=str, default='./w2i.dump', help='Word2Index file path [default: ./w2i.dump]') parser.add_argument('--i2w_file', type=str, default='./i2w.dump', help='Index2Word file path [default: ./i2w.dump]') parser.add_argument( '--alloc_mem', type=int, default=1024, help='Amount of memory to allocate [mb] [default: 1024]') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu N_TEST = args.n_test K = args.beam_size MAX_LEN = args.max_len ALLOC_MEM = args.alloc_mem # File paths MODEL_FILE = args.model_file INPUT_FILE = args.input_file OUTPUT_FILE = args.output_file W2I_FILE = args.w2i_file I2W_FILE = args.i2w_file # DyNet setting dyparams = dy.DynetParams() dyparams.set_autobatch(True) dyparams.set_random_seed(RANDOM_SEED) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Load trained model ============================================================================================== with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w: w2i = pickle.load(f_w2i) i2w = pickle.load(f_i2w) test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, n_data=N_TEST, target=False) model = dy.Model() V, encoder, decoder = dy.load(MODEL_FILE, model) # Decode pred_y = [] for x in tqdm(test_X): dy.renew_cg() associate_parameters([encoder, decoder]) # Initial states x_embs = [dy.lookup(V, x_t) for x_t in x] hp, hb_1 = encoder(x_embs) decoder.set_initial_states(hp, hb_1) s_0, c_0 = decoder.s_0, decoder.c_0 # candidates candidates = [[0, w2i['<s>'], s_0, c_0, []]] t = 0 while t < MAX_LEN: t += 1 tmp_candidates = [] end_flag = True for score_tm1, y_tm1, s_tm1, c_tm1, y_02tm1 in candidates: if y_tm1 == w2i['</s>']: tmp_candidates.append( [score_tm1, y_tm1, s_tm1, c_tm1, y_02tm1]) else: end_flag = False y_tm1_emb = dy.lookup(V, y_tm1) s_t, c_t, _q_t = decoder(y_tm1_emb, tm1s=[s_tm1, c_tm1], test=True) _q_t = np.log(_q_t.npvalue()) # Calculate log probs q_t, y_t = np.sort(_q_t)[::-1][:K], np.argsort( _q_t )[::-1][:K] # Pick K highest log probs and their ids score_t = score_tm1 + q_t # Accumulate log probs tmp_candidates.extend( [[score_tk, y_tk, s_t, c_t, y_02tm1 + [y_tk]] for score_tk, y_tk in zip(score_t, y_t)]) if end_flag: break candidates = sorted( tmp_candidates, key=lambda x: -x[0] / len(x[-1]) )[:K] # Sort in normalized log probs and pick K highest candidates # Pick the candidate with the highest score pred = candidates[0][-1] if w2i['</s>'] in pred: pred.remove(w2i['</s>']) pred_y.append(pred) pred_y_txt = '' for pred in pred_y: pred_y_txt += ' '.join([i2w[com] for com in pred]) + '\n' with open(OUTPUT_FILE, 'w') as f: f.write(pred_y_txt)
def main(): parser = argparse.ArgumentParser( description= 'Deep Recurrent Generative Decoder for Abstractive Text Summarization in DyNet' ) parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: -1]') parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs [default: 3]') parser.add_argument( '--n_train', type=int, default=3803957, help= 'Number of training examples (up to 3803957 in gigaword) [default: 3803957]' ) parser.add_argument( '--n_valid', type=int, default=189651, help= 'Number of validation examples (up to 189651 in gigaword) [default: 189651])' ) parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]') parser.add_argument('--emb_dim', type=int, default=256, help='Embedding size [default: 256]') parser.add_argument('--hid_dim', type=int, default=256, help='Hidden state size [default: 256]') parser.add_argument('--lat_dim', type=int, default=256, help='Latent size [default: 256]') parser.add_argument( '--alloc_mem', type=int, default=8192, help='Amount of memory to allocate [mb] [default: 8192]') args = parser.parse_args() print(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu N_EPOCHS = args.n_epochs N_TRAIN = args.n_train N_VALID = args.n_valid BATCH_SIZE = args.batch_size VOCAB_SIZE = 60000 EMB_DIM = args.emb_dim HID_DIM = args.hid_dim LAT_DIM = args.lat_dim ALLOC_MEM = args.alloc_mem # File paths TRAIN_X_FILE = './data/train.article.txt' TRAIN_Y_FILE = './data/train.title.txt' VALID_X_FILE = './data/valid.article.filter.txt' VALID_Y_FILE = './data/valid.title.filter.txt' # DyNet setting dyparams = dy.DynetParams() dyparams.set_autobatch(True) dyparams.set_random_seed(RANDOM_STATE) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Build dataset ==================================================================================== w2c = build_word2count(TRAIN_X_FILE, n_data=N_TRAIN) w2c = build_word2count(TRAIN_Y_FILE, w2c=w2c, n_data=N_TRAIN) train_X, w2i, i2w = build_dataset(TRAIN_X_FILE, w2c=w2c, padid=False, eos=True, unksym='<unk>', target=False, n_data=N_TRAIN, vocab_size=VOCAB_SIZE) train_y, _, _ = build_dataset(TRAIN_Y_FILE, w2i=w2i, target=True, n_data=N_TRAIN) valid_X, _, _ = build_dataset(VALID_X_FILE, w2i=w2i, target=False, n_data=N_VALID) valid_y, _, _ = build_dataset(VALID_Y_FILE, w2i=w2i, target=True, n_data=N_VALID) VOCAB_SIZE = len(w2i) OUT_DIM = VOCAB_SIZE print(VOCAB_SIZE) # Build model ====================================================================================== model = dy.Model() trainer = dy.AdamTrainer(model) V = model.add_lookup_parameters((VOCAB_SIZE, EMB_DIM)) encoder = BiGRU(model, EMB_DIM, 2 * HID_DIM) decoder = RecurrentGenerativeDecoder(model, EMB_DIM, 2 * HID_DIM, LAT_DIM, OUT_DIM) # Train model ======================================================================================= n_batches_train = math.ceil(len(train_X) / BATCH_SIZE) n_batches_valid = math.ceil(len(valid_X) / BATCH_SIZE) start_time = time.time() for epoch in range(N_EPOCHS): # Train train_X, train_y = shuffle(train_X, train_y) loss_all_train = [] for i in tqdm(range(n_batches_train)): # Create a new computation graph dy.renew_cg() encoder.associate_parameters() decoder.associate_parameters() # Create a mini batch start = i * BATCH_SIZE end = start + BATCH_SIZE train_X_mb = train_X[start:end] train_y_mb = train_y[start:end] losses = [] for x, t in zip(train_X_mb, train_y_mb): t_in, t_out = t[:-1], t[1:] # Encoder x_embs = [dy.lookup(V, x_t) for x_t in x] he = encoder(x_embs) # Decoder t_embs = [dy.lookup(V, t_t) for t_t in t_in] decoder.set_initial_states(he) y, KL = decoder(t_embs) loss = dy.esum([ dy.pickneglogsoftmax(y_t, t_t) + KL_t for y_t, t_t, KL_t in zip(y, t_out, KL) ]) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_train.append(mb_loss.value()) # Backward prop mb_loss.backward() trainer.update() # Valid loss_all_valid = [] for i in range(n_batches_valid): # Create a new computation graph dy.renew_cg() encoder.associate_parameters() decoder.associate_parameters() # Create a mini batch start = i * BATCH_SIZE end = start + BATCH_SIZE valid_X_mb = valid_X[start:end] valid_y_mb = valid_y[start:end] losses = [] for x, t in zip(valid_X_mb, valid_y_mb): t_in, t_out = t[:-1], t[1:] # Encoder x_embs = [dy.lookup(V, x_t) for x_t in x] he = encoder(x_embs) # Decoder t_embs = [dy.lookup(V, t_t) for t_t in t_in] decoder.set_initial_states(he) y, KL = decoder(t_embs) loss = dy.esum([ dy.pickneglogsoftmax(y_t, t_t) + KL_t for y_t, t_t, KL_t in zip(y, t_out, KL) ]) losses.append(loss) mb_loss = dy.average(losses) # Forward prop loss_all_valid.append(mb_loss.value()) print('EPOCH: %d, Train Loss: %.3f, Valid Loss: %.3f' % (epoch + 1, np.mean(loss_all_train), np.mean(loss_all_valid))) # Save model ====================================================================================== dy.save('./model_e' + str(epoch + 1), [V, encoder, decoder]) with open('./w2i.dump', 'wb') as f_w2i, open('./i2w.dump', 'wb') as f_i2w: pickle.dump(w2i, f_w2i) pickle.dump(i2w, f_i2w)
def main(): parser = argparse.ArgumentParser(description='Convolutional Neural Networks for Sentence Classification in DyNet') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID to use. For cpu, set -1 [default: -1]') parser.add_argument('--model_file', type=str, default='./model', help='Model to use for prediction [default: ./model]') parser.add_argument('--input_file', type=str, default='./data/valid_x.txt', help='Input file path [default: ./data/valid_x.txt]') parser.add_argument('--output_file', type=str, default='./pred_y.txt', help='Output file path [default: ./pred_y.txt]') parser.add_argument('--w2i_file', type=str, default='./w2i.dump', help='Word2Index file path [default: ./w2i.dump]') parser.add_argument('--i2w_file', type=str, default='./i2w.dump', help='Index2Word file path [default: ./i2w.dump]') parser.add_argument('--alloc_mem', type=int, default=1024, help='Amount of memory to allocate [mb] [default: 1024]') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) MODEL_FILE = args.model_file INPUT_FILE = args.input_file OUTPUT_FILE = args.output_file W2I_FILE = args.w2i_file I2W_FILE = args.i2w_file ALLOC_MEM = args.alloc_mem # DyNet setting dyparams = dy.DynetParams() dyparams.set_mem(ALLOC_MEM) dyparams.init() # Load model model = dy.Model() pretrained_model = dy.load(MODEL_FILE, model) if len(pretrained_model) == 3: V1, layers = pretrained_model[0], pretrained_model[1:] MULTICHANNEL = False else: V1, V2, layers = pretrained_model[0], pretrained_model[1], pretrained_model[2:] MULTICHANNEL = True EMB_DIM = V1.shape()[0] WIN_SIZES = layers[0].win_sizes # Load test data with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w: w2i = pickle.load(f_w2i) i2w = pickle.load(f_i2w) max_win = max(WIN_SIZES) test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, unksym='unk') test_X = [[0]*max_win + instance_x + [0]*max_win for instance_x in test_X] # Pred pred_y = [] for instance_x in tqdm(test_X): # Create a new computation graph dy.renew_cg() associate_parameters(layers) sen_len = len(instance_x) if MULTICHANNEL: x_embs1 = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1) x_embs2 = dy.concatenate([dy.lookup(V2, x_t, update=False) for x_t in instance_x], d=1) x_embs1 = dy.transpose(x_embs1) x_embs2 = dy.transpose(x_embs2) x_embs = dy.concatenate([x_embs1, x_embs2], d=2) else: x_embs = dy.concatenate([dy.lookup(V1, x_t, update=False) for x_t in instance_x], d=1) x_embs = dy.transpose(x_embs) x_embs = dy.reshape(x_embs, (sen_len, EMB_DIM, 1)) y = f_props(layers, x_embs, train=False) pred_y.append(str(int(binary_pred(y.value())))) with open(OUTPUT_FILE, 'w') as f: f.write('\n'.join(pred_y))
def main(): parser = argparse.ArgumentParser( description= 'A Neural Attention Model for Abstractive Sentence Summarization in DyNet' ) parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: `-`]') parser.add_argument('--n_test', type=int, default=189651, help='Number of test examples [default: `189651`]') parser.add_argument('--beam_size', type=int, default=5, help='Beam size [default: `5`]') parser.add_argument('--max_len', type=int, default=100, help='Maximum length of decoding [default: `100`]') parser.add_argument('--model_file', type=str, default='./model_e1', help='Trained model file path [default: `./model_e1`]') parser.add_argument( '--input_file', type=str, default='./data/valid.article.filter.txt', help='Test file path [default: `./data/valid.article.filter.txt`]') parser.add_argument('--output_file', type=str, default='./pred_y.txt', help='Output file path [default: `./pred_y.txt`]') parser.add_argument('--w2i_file', type=str, default='./w2i.dump', help='Word2Index file path [default: `./w2i.dump`]') parser.add_argument('--i2w_file', type=str, default='./i2w.dump', help='Index2Word file path [default: `./i2w.dump`]') parser.add_argument( '--alloc_mem', type=int, default=1024, help='Amount of memory to allocate [mb] [default: `1024`]') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu N_TEST = args.n_test K = args.beam_size MAX_LEN = args.max_len ALLOC_MEM = args.alloc_mem # File paths MODEL_FILE = args.model_file INPUT_FILE = args.input_file OUTPUT_FILE = args.output_file W2I_FILE = args.w2i_file I2W_FILE = args.i2w_file # DyNet setting dyparams = dy.DynetParams() dyparams.set_autobatch(True) dyparams.set_random_seed(RANDOM_STATE) dyparams.set_mem(ALLOC_MEM) dyparams.init() # Load trained model ============================================================================================== with open(W2I_FILE, 'rb') as f_w2i, open(I2W_FILE, 'rb') as f_i2w: w2i = pickle.load(f_w2i) i2w = pickle.load(f_i2w) test_X, _, _ = build_dataset(INPUT_FILE, w2i=w2i, n_data=N_TEST) model = dy.Model() rush_abs = dy.load(MODEL_FILE, model)[0] ENCODER_TYPE = rush_abs.encoder_type C = rush_abs.c # Decode pred_y = [] for x in tqdm(test_X): dy.renew_cg() rush_abs.associate_parameters() # Initial states rush_abs.set_initial_states(x) # [accum log prob, BOS, t_c, decoded sequence] candidates = [[0, w2i['<s>'], [w2i['<s>']] * C, []]] t = 0 while t < MAX_LEN: t += 1 tmp_candidates = [] end_flag = True for score_tm1, y_tm1, y_c, y_02tm1 in candidates: if y_tm1 == w2i['</s>']: tmp_candidates.append([score_tm1, y_tm1, y_c, y_02tm1]) else: end_flag = False _q_t = rush_abs(t=y_c, test=True) _q_t = np.log(_q_t.npvalue()) # Log probs q_t, y_t = np.sort(_q_t)[::-1][:K], np.argsort( _q_t )[::-1][:K] # Pick K highest log probs and their ids score_t = score_tm1 + q_t # Accum log probs tmp_candidates.extend( [[score_tk, y_tk, y_c[1:] + [y_tk], y_02tm1 + [y_tk]] for score_tk, y_tk in zip(score_t, y_t)]) if end_flag: break candidates = sorted( tmp_candidates, key=lambda x: -x[0] / len(x[-1]) )[:K] # Sort in normalized score and pick K highest candidates # Pick the highest-scored candidate pred_y.append(candidates[0][-1]) pred_y_txt = '' for pred in pred_y: pred_y_txt += ' '.join([i2w[com] for com in pred]) + '\n' with open(OUTPUT_FILE, 'w') as f: f.write(pred_y_txt)