def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: dy_params.set_autobatch(True) else: raise Exception('Tagger currently only supports autobatching.' 'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz') dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = False # FIXME These should be registered instead exporter_type = kwargs.get('exporter_type', 'default') if exporter_type == 'default': from mead.tf.exporters import TaggerTensorFlowExporter backend.exporter = TaggerTensorFlowExporter elif exporter_type == 'preproc': from mead.tf.preproc_exporters import TaggerTensorFlowPreProcExporter import mead.tf.preprocessors backend.exporter = TaggerTensorFlowPreProcExporter backend.load(self.task_name()) return backend
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} self.config_params['preproc']['show_ex'] = show_examples if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['train']['trainer_type'] = 'autobatch' dy_params.set_autobatch(True) batched = False else: batched = True dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = True backend.load(self.task_name()) return backend
def __init__(self, h_layers, h_dim, vocab_size, noise_sigma=0.1, trainer="adam", clip_threshold=5.0, add_hidden=False, learning_rate=0.001, activation='rectify'): self.model = dynet.ParameterCollection() self.h_layers = h_layers self.h_dim = h_dim self.vocab_size = vocab_size self.noise_sigma = noise_sigma if self.noise_sigma > 0.05: print('Noise sigma > %.4f. Training might not work.' % noise_sigma) self.layers = [] self.output_layers_dict = {} self.trainer = TRAINER_MAP[trainer](self.model, learning_rate) self.trainer.set_clip_threshold(clip_threshold) self.task_ids = ["F0", "F1", "Ft"] self.add_hidden = add_hidden self.activation_func = activation self.activation = activation2func(activation)
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: self.config_params['train']['trainer_type'] = 'autobatch' dy_params.set_autobatch(True) batched = False else: batched = True dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched} elif backend.name == 'tf': # FIXME this should be registered as well! exporter_type = kwargs.get('exporter_type', 'default') if exporter_type == 'default': from mead.tf.exporters import ClassifyTensorFlowExporter backend.exporter = ClassifyTensorFlowExporter elif exporter_type == 'preproc': from mead.tf.preproc_exporters import ClassifyTensorFlowPreProcExporter import mead.tf.preprocessors backend.exporter = ClassifyTensorFlowPreProcExporter backend.load(self.task_name()) return backend
def _create_backend(self, **kwargs): backend = Backend(self.config_params.get('backend', 'tf')) if 'preproc' not in self.config_params: self.config_params['preproc'] = {} if backend.name == 'pytorch': self.config_params['preproc']['trim'] = True elif backend.name == 'dy': import _dynet dy_params = _dynet.DynetParams() dy_params.from_args() dy_params.set_requested_gpus(1) if 'autobatchsz' in self.config_params['train']: dy_params.set_autobatch(True) else: raise Exception('Tagger currently only supports autobatching.' 'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz') dy_params.init() backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False} self.config_params['preproc']['trim'] = True else: self.config_params['preproc']['trim'] = False backend.load(self.task_name()) return backend
def load(cls, word_embeddings, base_file, embedding_dim=None, hidden_dim=None, classes_dim=None): pc = dy.ParameterCollection() matrices = dy.load(base_file, pc) # matrices = matrices[1:] # for now, skip "E" return cls(word_embeddings, embedding_dim, hidden_dim, classes_dim, pc, matrices)
def __init__(self, h_layers, h_dim, vocab_size, noise_sigma=0.1, trainer="adam", clip_threshold=5.0, activation='rectify'): self.model = dynet.ParameterCollection() self.h_layers = h_layers self.h_dim = h_dim self.vocab_size = vocab_size self.noise_sigma = noise_sigma self.activation = activation2func(activation) self.layers = [] self.trainer = TRAINER_MAP[trainer](self.model) self.trainer.set_clip_threshold(clip_threshold)
def __init__(self,word_size,context_fre, context_size,vocab,window=2,subsample_n=2000,mode='bow',embed_size=200, batch_size=128,num_sampled=5, epoch=6): self.embed_size = embed_size self.mode = mode self.window = window self.vocab = vocab self.word_size = word_size self.subsample_n = subsample_n self.context_size = context_size self.num_sampled = num_sampled self.epoch = epoch self.context_fre = context_fre self.batch_size=batch_size self.pc = dy.ParameterCollection() self.optimizer = dy.AdamTrainer(self.pc) self.word_embeddings = self.pc.add_lookup_parameters((self.word_size, self.embed_size), name="word-embeddings") self.context_embeddings = self.pc.add_lookup_parameters((self.context_size, self.embed_size), name="context-embeddings") dy.renew_cg() print ([(param.name(), param.shape()) for param in self.pc.lookup_parameters_list() + self.pc.parameters_list()])
def create_network_params(nwords, ntags, external_E=None): # create a parameter collection and add the parameters. print("adding parameters") m = dy.ParameterCollection() print("nwords: {}".format(nwords)) E = m.add_lookup_parameters((nwords, EMB), name='E') if external_E and sum(external_E.shape) > 0: assert external_E.shape[1] == EMB external_rows = external_E.shape[0] for r in range(external_rows): E.init_row(r, external_E[r, :]) b = m.add_parameters(HIDDEN, name='b') U = m.add_parameters((ntags, HIDDEN), name='U') W = m.add_parameters((HIDDEN, INPUT), name='W') bp = m.add_parameters(ntags, name='bp') dy.renew_cg() return m, E, b, U, W, bp
def __init__(self, word_embeddings, embedding_dim=None, hidden_dim=None, classes_dim=None, pc=None, trained_matrices=None): embedding_dim = embedding_dim or 300 hidden_dim = hidden_dim or 200 classes_dim = classes_dim or 3 self.embeddings = word_embeddings self.embedding_dim, self.hidden_dim, self.classes_dim = embedding_dim, hidden_dim, classes_dim if pc and trained_matrices: print "loading pretrained inputs" self.pc = pc first_attend_index = 2 + DIMR_DEPTH * 2 # the *2 accounts for the w and b in each layer first_compare_index = first_attend_index + ATTEND_DEPTH * 2 first_agg_index = first_compare_index + COMPARE_DEPTH * 2 print "dimension reducer", range(1, first_attend_index) print "attend", range(first_attend_index, first_compare_index) print "compare", range(first_compare_index, first_agg_index) print "aggregate", range(first_agg_index, len(trained_matrices)) self.dimension_reducer = self._create_dimension_reducer( trained_matrices[1:first_attend_index]) self.attend = self._create_attend( trained_matrices[first_attend_index:first_compare_index]) self.compare = self._create_compare( trained_matrices[first_compare_index:first_agg_index]) self.aggregate = self._create_aggregate( trained_matrices[first_agg_index:]) self.params = {"E": trained_matrices[0]} else: self.pc = dy.ParameterCollection() self.dimension_reducer = self._create_dimension_reducer() self.attend = self._create_attend() self.compare = self._create_compare() self.aggregate = self._create_aggregate() self.params = {"E": word_embeddings.as_dynet_lookup(self.pc)}
def __init__(self, in_dim, h_dim, c_in_dim, h_layers, embeds_file=None, activation=dynet.tanh, noise_sigma=0.1, word2id=None, add_hidden=False, trainer="adam", clip_threshold=5.0, learning_rate=0.001, adversarial_domains=None): self.w2i = {} if word2id is None else word2id # word to index mapping self.c2i = {} # char to index mapping self.tag2idx = {} # tag to tag_id mapping self.model = dynet.ParameterCollection() # init model # init trainer train_algo = TRAINER_MAP[trainer] self.trainer = train_algo(self.model, learning_rate) if clip_threshold: self.trainer.set_clip_threshold(clip_threshold) self.in_dim = in_dim self.h_dim = h_dim self.c_in_dim = c_in_dim self.activation = activation self.noise_sigma = noise_sigma self.h_layers = h_layers self.predictors = { "inner": [], "output_layers_dict": {}, "task_expected_at": {} } # the inner layers and predictors self.wembeds = None # lookup: embeddings for words self.cembeds = None # lookup: embeddings for characters self.embeds_file = embeds_file self.char_rnn = None # RNN for character input self.task_ids = ["F0", "F1", "Ft"] self.add_hidden = add_hidden self.adversarial_domains = adversarial_domains
def create_computation_graph(self, num_lemmas, num_pos, num_dep, num_directions, num_relations, wv=None, lemma_dimension=50): model = dy.ParameterCollection() if self.opt['use_path']: input_dim = self.opt['PATH_LSTM_HIDDEN_DIM'] else: input_dim = 0 # dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc) builder = dy.LSTMBuilder( self.opt['NUM_LAYERS'], lemma_dimension + self.opt['POS_DIM'] + self.opt['DEP_DIM'] + self.opt['DIR_DIM'], input_dim, model) model_parameters = {} for k, v in feat_dims.items(): model_parameters[k] = model.add_lookup_parameters(v) # Concatenate x and y if self.opt['use_xy_embeddings']: input_dim += 2 * lemma_dimension if self.opt['use_features']: for name, dim in feat_dims.items(): if 'diff' in name and not self.opt['use_freq_features']: continue input_dim += dim[1] if self.opt['use_height_ebd']: model_parameters['height_lookup'] = model.add_lookup_parameters( (10, self.opt['height_ebd_dim'])) input_dim += self.opt['height_ebd_dim'] model_parameters['lemma_lookup'] = model.add_lookup_parameters( (num_lemmas, lemma_dimension)) builder_hist = dy.LSTMBuilder(2, input_dim, self.opt['HIST_LSTM_HIDDEN_DIM'], model) # Pre-trained word embeddings if wv is not None: model_parameters['lemma_lookup'].init_from_array(wv) model_parameters['pos_lookup'] = model.add_lookup_parameters( (num_pos, self.opt['POS_DIM'])) model_parameters['dep_lookup'] = model.add_lookup_parameters( (num_dep, self.opt['DEP_DIM'])) model_parameters['dir_lookup'] = model.add_lookup_parameters( (num_directions, self.opt['DIR_DIM'])) if not self.opt['one_layer']: if self.opt['use_history']: model_parameters['W2_rl'] = model.add_parameters( (input_dim, self.opt['MLP_HIDDEN_DIM'])) model_parameters['b2_rl'] = model.add_parameters( (input_dim, 1)) model_parameters['W1_rl'] = model.add_parameters( (self.opt['MLP_HIDDEN_DIM'], self.opt['HIST_LSTM_HIDDEN_DIM'])) model_parameters['b1_rl'] = model.add_parameters( (self.opt['MLP_HIDDEN_DIM'], 1)) else: model_parameters['W2_rl'] = model.add_parameters( (input_dim, self.opt['MLP_HIDDEN_DIM'])) model_parameters['b2_rl'] = model.add_parameters( (1, self.opt['MLP_HIDDEN_DIM'])) model_parameters['W1_rl'] = model.add_parameters( (self.opt['MLP_HIDDEN_DIM'], 1)) model_parameters['b1_rl'] = model.add_parameters((1, 1)) else: if self.opt['use_history']: model_parameters['W1_rl'] = model.add_parameters( (input_dim, self.opt['HIST_LSTM_HIDDEN_DIM'])) model_parameters['b1_rl'] = model.add_parameters( (input_dim, 1)) else: model_parameters['W1_rl'] = model.add_parameters( (input_dim, 1)) model_parameters['b1_rl'] = model.add_parameters((1, 1)) if self.opt['load_model_file'] is not None: print 'model loaded from', self.opt['load_model_file'] model.populate('{}'.format(self.opt['load_model_file'])) if self.opt['load_opt']: print 'opt loaded from', '{}.json'.format( self.opt['load_model_file']) self.opt = json.load( open('{}.json'.format(self.opt['load_model_file']))) return builder, model, model_parameters, builder_hist, input_dim
def __init__(self): self.pc = dy.ParameterCollection() self.params = {} self.last_output = None self.with_bias = None
test_ins, test_act, test_init, test_id = load('test_final.json') dev_gt_int = pd.read_csv('./dev_interaction_y.csv', index_col="id")['final_world_state'].values # Dynet PART ## Initial SEQ2SEQ NETWORK LAYERS = 1 INPUT_DIM = 50 char_DIM = 20 HIDDEN_DIM = 100 ATTENTION_DIM = HIDDEN_DIM VOCAB_SIZE_input = len(vocab) VOCAB_SIZE_out = len(vocab_out) VOCAB_char = len(int2char) pc = dy.ParameterCollection() encoder = dy.CompactVanillaLSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc) decoder = dy.CompactVanillaLSTMBuilder(LAYERS, INPUT_DIM + HIDDEN_DIM * 2, HIDDEN_DIM, pc) params_encoder = {} params_encoder["lookup"] = pc.add_lookup_parameters( (VOCAB_SIZE_input, INPUT_DIM)) params_decoder = {} params_decoder["lookup"] = pc.add_lookup_parameters( (VOCAB_SIZE_out, INPUT_DIM)) params_decoder["R"] = pc.add_parameters((VOCAB_SIZE_out, HIDDEN_DIM)) params_decoder["bias"] = pc.add_parameters((VOCAB_SIZE_out)) params_decoder["attention_w"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM)) params_decoder["attention_b"] = pc.add_parameters((ATTENTION_DIM)) params_decoder["attention_wc"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
argparser.add_argument('--dev_fscore', required=True) argparser.add_argument('--unsupervised', action="store_true") argparser.add_argument('--use_bert', action="store_true") args, extra_args = argparser.parse_known_args() args.config_file = "configs/{}.cfg".format( args.model[:args.model.find('Parser') + 6]) config = Configurable(args.config_file, extra_args) dyparams = dy.DynetParams() # dyparams.from_args() # dyparams.set_autobatch(True) dyparams.set_random_seed(666) dyparams.set_mem(5120) dyparams.init() model = dy.ParameterCollection() model_path = config.load_model_path + \ args.model + "_dev={}".format(args.dev_fscore) # model_path = config.load_model_path + "GNNParser2_50epoch" [parser] = dy.load(model_path, model) print("Loaded model from {}".format(model_path)) if args.use_bert: test_bert_embeddings = parser.vocab.load_bert_embeddings( config.test_bert_file) print('Loaded bert embeddings!') testing_data = parser.vocab.gold_data_from_file(config.test_file) print("Loaded testing data from {}".format(config.test_file))
def new_model(): return dy.ParameterCollection()
def create_computation_graph(num_lemmas, num_pos, num_dep, num_directions, num_relations, wv=None, use_xy_embeddings=False, num_hidden_layers=0, lemma_dimension=50): """ Initialize the model :param num_lemmas Number of distinct lemmas :param num_pos Number of distinct part of speech tags :param num_dep Number of distinct depenedency labels :param num_directions Number of distinct path directions (e.g. >,<) :param num_relations Number of classes (e.g. binary = 2) :param wv Pre-trained word embeddings file :param use_xy_embeddings Whether to concatenate x and y word embeddings to the network input :param num_hidden_layers The number of hidden layers for the term-pair classification network :param lemma_dimension The dimension of the lemma embeddings :return: """ # model = Model() -- gives error? tried to fix by looking at dynet tutorial examples -- GB dy.renew_cg() # Renew the computation graph. # Call this before building any new computation graph model = dy.ParameterCollection() # ParameterCollection to hold the parameters network_input = LSTM_HIDDEN_DIM builder = dy.LSTMBuilder(NUM_LAYERS, lemma_dimension + POS_DIM + DEP_DIM + DIR_DIM, network_input, model) # Concatenate x and y if use_xy_embeddings: network_input += 2 * lemma_dimension # 'the optimal size of the hidden layer is usually between the size of the input and size of the output layers' hidden_dim = int((network_input + num_relations) / 2) model_parameters = {} if num_hidden_layers == 0: # model_parameters['W_cnn'] = model.add_parameters((1, WIN_SIZE, EMB_SIZE, FILTER_SIZE)) # cnn weights # model_parameters['b_cnn'] = model.add_parameters((FILTER_SIZE)) # cnn bias model_parameters['W1'] = model.add_parameters( (num_relations, network_input)) model_parameters['b1'] = model.add_parameters((num_relations, 1)) # A ParameterCollection is a container for Parameters and LookupParameters. # dynet.Trainer objects take ParameterCollection objects that define which parameters are being trained. elif num_hidden_layers == 1: model_parameters['W1'] = model.add_parameters( (hidden_dim, network_input)) model_parameters['b1'] = model.add_parameters((hidden_dim, 1)) model_parameters['W2'] = model.add_parameters( (num_relations, hidden_dim)) model_parameters['b2'] = model.add_parameters((num_relations, 1)) else: raise ValueError('Only 0 or 1 hidden layers are supported') model_parameters['lemma_lookup'] = model.add_lookup_parameters( (num_lemmas, lemma_dimension)) #LookupParameters represents a table of parameters. # They are used to embed a set of discrete objects (e.g. word embeddings). These are sparsely updated. # Pre-trained word embeddings if wv is not None: model_parameters['lemma_lookup'].init_from_array(wv) model_parameters['pos_lookup'] = model.add_lookup_parameters( (num_pos, POS_DIM)) model_parameters['dep_lookup'] = model.add_lookup_parameters( (num_dep, DEP_DIM)) model_parameters['dir_lookup'] = model.add_lookup_parameters( (num_directions, DIR_DIM)) return builder, model, model_parameters
def create_computation_graph(num_lemmas, num_pos, num_dep, num_directions, num_relations, wv=None, use_xy_embeddings=False, num_hidden_layers=0, lemma_dimension=50): """ Initialize the model :param num_lemmas Number of distinct lemmas :param num_pos Number of distinct part of speech tags :param num_dep Number of distinct depenedency labels :param num_directions Number of distinct path directions (e.g. >,<) :param num_relations Number of classes (e.g. binary = 2) :param wv Pre-trained word embeddings file :param use_xy_embeddings Whether to concatenate x and y word embeddings to the network input :param num_hidden_layers The number of hidden layers for the term-pair classification network :param lemma_dimension The dimension of the lemma embeddings :return: """ # model = Model() -- gives error? tried to fix by looking at dynet tutorial examples -- GB dy.renew_cg() model = dy.ParameterCollection() network_input = LSTM_HIDDEN_DIM builder = dy.LSTMBuilder(NUM_LAYERS, lemma_dimension + POS_DIM + DEP_DIM + DIR_DIM, network_input, model) # Concatenate x and y if use_xy_embeddings: network_input += 2 * lemma_dimension # 'the optimal size of the hidden layer is usually between the size of the input and size of the output layers' hidden_dim = int((network_input + num_relations) / 2) model_parameters = {} if num_hidden_layers == 0: model_parameters['W1'] = model.add_parameters( (num_relations, network_input)) model_parameters['b1'] = model.add_parameters((num_relations, 1)) elif num_hidden_layers == 1: model_parameters['W1'] = model.add_parameters( (hidden_dim, network_input)) model_parameters['b1'] = model.add_parameters((hidden_dim, 1)) model_parameters['W2'] = model.add_parameters( (num_relations, hidden_dim)) model_parameters['b2'] = model.add_parameters((num_relations, 1)) else: raise ValueError('Only 0 or 1 hidden layers are supported') model_parameters['lemma_lookup'] = model.add_lookup_parameters( (num_lemmas, lemma_dimension)) # Pre-trained word embeddings if wv is not None: model_parameters['lemma_lookup'].init_from_array(wv) model_parameters['pos_lookup'] = model.add_lookup_parameters( (num_pos, POS_DIM)) model_parameters['dep_lookup'] = model.add_lookup_parameters( (num_dep, DEP_DIM)) model_parameters['dir_lookup'] = model.add_lookup_parameters( (num_directions, DIR_DIM)) return builder, model, model_parameters
def __init__( self, embed_size, word_hidden_size, training_file, dev_file, test_file, batch_size, model_file, lstm_feats, crf_feats, autoencoder, train_features, dev_features, test_features, testing, restart, feat_func, ): self.crf_feats = crf_feats self.lstm_feats = lstm_feats self.autoencoder = autoencoder self.embed_size = embed_size self.word_hidden_size = word_hidden_size self.model_file = model_file self.featsize = 0 self.word_vocab = defaultdict(lambda: len(self.word_vocab)) self.char_vocab = defaultdict(lambda: len(self.char_vocab)) self.tag_vocab = defaultdict(lambda: len(self.tag_vocab)) self.word_lookup = [] self.training_data = self.read_train(training_file, train_features) self.dev_data = self.read_test(dev_file, dev_features) self.test_data = self.read_test(test_file, test_features) self.batch_size = batch_size self.reverse_tag_lookup = dict((v, k) for k, v in self.tag_vocab.items()) self.reverse_word_lookup = dict((v, k) for k, v in self.word_vocab.items()) self.model = dy.ParameterCollection() self.cnn = CNNModule(self.model, self.char_vocab) self.word_embeds = self.model.add_lookup_parameters( (len(self.word_vocab), embed_size) ) arr = np.array(self.word_lookup) self.word_embeds.init_from_array(arr) self.word_lstm = dy.BiRNNBuilder( 1, CNN_OUT_SIZE + embed_size + FEAT_OUT_SIZE, word_hidden_size, self.model, dy.LSTMBuilder, ) self.feat_w = self.model.add_parameters((FEAT_OUT_SIZE, self.featsize)) self.feat_b = self.model.add_parameters((FEAT_OUT_SIZE)) self.feat_func = feat_func num_tags = len(self.tag_vocab) + 2 self.num_tags = num_tags # Last linear layer to map the output of the LSTM to the tag space self.context_to_emit_w = self.model.add_parameters( (len(self.tag_vocab), word_hidden_size + FEAT_OUT_SIZE) ) self.context_to_emit_b = self.model.add_parameters((len(self.tag_vocab))) self.crf_module = CRFModule(self.model, self.tag_vocab) self.o_tag = self.tag_vocab["O"] self.context_to_trans_w = self.model.add_parameters( (num_tags * num_tags, word_hidden_size + FEAT_OUT_SIZE) ) self.context_to_trans_b = self.model.add_parameters((num_tags * num_tags)) self.feat_reconstruct_w = self.model.add_parameters( (self.featsize, word_hidden_size) ) self.feat_reconstruct_b = self.model.add_parameters((self.featsize)) if DROPOUT > 0.0: self.word_lstm.set_dropout(DROPOUT) if os.path.exists(self.model_file) and (testing or restart): self.model.populate(self.model_file) print("Populated!") v_acc = self.get_accuracy(self.dev_data, print_out="dev.") print("Validation F1: %f\n" % v_acc)
def __init__( self, src1_vocab, src2_vocab, tgt_vocab, single, pointer_gen, coverage, diag_loss, load_model, model_file, beam_size, best_val_cer, ): self.model = dy.ParameterCollection() self.src1_vocab = src1_vocab self.src2_vocab = src2_vocab self.tgt_vocab = tgt_vocab self.src1_lookup = self.model.add_lookup_parameters( (src1_vocab.length(), EMBEDDING_DIM) ) self.src2_lookup = self.model.add_lookup_parameters( (src2_vocab.length(), EMBEDDING_DIM) ) self.tgt_lookup = self.model.add_lookup_parameters( (tgt_vocab.length(), EMBEDDING_DIM) ) self.enc1_fwd_lstm = dy.CoupledLSTMBuilder( LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model ) self.enc1_bwd_lstm = dy.CoupledLSTMBuilder( LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model ) self.pret1_w = self.model.add_parameters((src1_vocab.length(), HIDDEN_DIM)) self.pret1_b = self.model.add_parameters((src1_vocab.length())) self.enc2_fwd_lstm = dy.CoupledLSTMBuilder( LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model ) self.enc2_bwd_lstm = dy.CoupledLSTMBuilder( LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model ) self.pret2_w = self.model.add_parameters((src2_vocab.length(), HIDDEN_DIM)) self.pret2_b = self.model.add_parameters((src2_vocab.length())) self.att1_w1 = self.model.add_parameters((ATTENTION_SIZE, HIDDEN_DIM * 2)) self.att1_w2 = self.model.add_parameters( (ATTENTION_SIZE, HIDDEN_DIM * LSTM_NUM_OF_LAYERS * 2) ) self.att1_v = self.model.add_parameters((1, ATTENTION_SIZE)) self.att2_w1 = self.model.add_parameters((ATTENTION_SIZE, HIDDEN_DIM * 2)) self.att2_w2 = self.model.add_parameters( (ATTENTION_SIZE, HIDDEN_DIM * LSTM_NUM_OF_LAYERS * 2) ) self.att2_v = self.model.add_parameters((1, ATTENTION_SIZE)) self.dec_lstm = dy.CoupledLSTMBuilder( LSTM_NUM_OF_LAYERS, HIDDEN_DIM * 4 + EMBEDDING_DIM, HIDDEN_DIM, self.model ) self.W_s = self.model.add_parameters((HIDDEN_DIM, HIDDEN_DIM * 4)) self.b_s = self.model.add_parameters((HIDDEN_DIM)) self.dec_w = self.model.add_parameters((tgt_vocab.length(), HIDDEN_DIM)) self.dec_b = self.model.add_parameters((tgt_vocab.length())) # Pointer-generator parameters self.ptr_w_c = self.model.add_parameters((1, 2 * HIDDEN_DIM)) self.ptr_w_s = self.model.add_parameters((1, 2 * HIDDEN_DIM)) self.ptr_w_x = self.model.add_parameters((1, EMBEDDING_DIM + 4 * HIDDEN_DIM)) # Coverage parameters self.w_cov = self.model.add_parameters((ATTENTION_SIZE, 1)) self.single_source = single self.pointer_gen = pointer_gen self.coverage = coverage self.diag_loss = diag_loss self.model_file = model_file if load_model: self.model.populate(load_model) logging.info("Loaded model: {}".format(load_model)) self.beam_size = beam_size self.best_val_cer = best_val_cer
def __init__(self, in_dim, h_dim, c_in_dim, c_h_dim, h_layers, pred_layer, learning_algo="sgd", learning_rate=0, embeds_file=None, activation=ACTIVATION_MAP["tanh"], mlp=0, activation_mlp=ACTIVATION_MAP["rectify"], backprob_embeds=True, noise_sigma=0.1, w_dropout_rate=0.25, c_dropout_rate=0.25, initializer=INITIALIZER_MAP["glorot"], builder=BUILDERS["lstmc"], crf=False, viterbi_loss=False, mimickx_model_path=None, dictionary=None, type_constraint=False, lex_dim=0, embed_lex=False): self.w2i = {} # word to index mapping self.c2i = {} # char to index mapping self.w2c_cache = {} # word to char index cache for frequent words self.wcount = None # word count self.ccount = None # char count self.task2tag2idx = {} # need one dictionary per task self.pred_layer = [int(layer) for layer in pred_layer ] # at which layer to predict each task self.model = dynet.ParameterCollection() #init model self.in_dim = in_dim self.h_dim = h_dim self.c_in_dim = c_in_dim self.c_h_dim = c_h_dim self.w_dropout_rate = w_dropout_rate self.c_dropout_rate = c_dropout_rate self.activation = activation self.mlp = mlp self.activation_mlp = activation_mlp self.noise_sigma = noise_sigma self.h_layers = h_layers self.predictors = { "inner": [], "output_layers_dict": {}, "task_expected_at": {} } # the inner layers and predictors self.wembeds = None # lookup: embeddings for words self.cembeds = None # lookup: embeddings for characters self.lembeds = None # lookup: embeddings for lexical features (optional) self.embeds_file = embeds_file trainer_algo = TRAINER_MAP[learning_algo] if learning_rate > 0: ### TODO: better handling of additional learning-specific parameters self.trainer = trainer_algo(self.model, learning_rate=learning_rate) else: # using default learning rate self.trainer = trainer_algo(self.model) self.backprob_embeds = backprob_embeds self.initializer = initializer self.char_rnn = None # biRNN for character input self.builder = builder # default biRNN is an LSTM self.crf = crf self.viterbi_loss = viterbi_loss self.mimickx_model_path = mimickx_model_path if mimickx_model_path: # load self.mimickx_model = load_model(mimickx_model_path) self.dictionary = None self.type_constraint = type_constraint self.embed_lex = False self.l2i = {UNK: 0} # lex feature to index mapping if dictionary: self.dictionary, self.dictionary_values = load_dict(dictionary) self.path_to_dictionary = dictionary if type_constraint: self.lex_dim = 0 else: if embed_lex: self.lex_dim = lex_dim self.embed_lex = True print("Embed lexical features") # register property indices for prop in self.dictionary_values: self.l2i[prop] = len(self.l2i) else: self.lex_dim = len(self.dictionary_values) #n-hot encoding print("Lex_dim: {}".format(self.lex_dim), file=sys.stderr) else: self.dictionary = None self.path_to_dictionary = None self.lex_dim = 0
def build_model(input_vocabulary, output_vocabulary, input_dim, hidden_dim, layers): # define all model parameters # TODO: add logic for "smart" parameter allocation according to the user's chosen architecture print 'creating model...' sys.stdout.flush() model = dn.ParameterCollection() params = {} # input embeddings params['input_lookup'] = model.add_lookup_parameters( (len(input_vocabulary), input_dim)) # init vector for input feeding params['init_lookup'] = model.add_lookup_parameters((1, 3 * hidden_dim)) # output embeddings params['output_lookup'] = model.add_lookup_parameters( (len(output_vocabulary), input_dim)) # used in softmax output params['readout'] = model.add_parameters( (len(output_vocabulary), 3 * hidden_dim)) params['bias'] = model.add_parameters(len(output_vocabulary)) # rnn's if bool(arguments['--compact']): params['encoder_frnn'] = dn.CompactVanillaLSTMBuilder( layers, input_dim, hidden_dim, model) params['encoder_rrnn'] = dn.CompactVanillaLSTMBuilder( layers, input_dim, hidden_dim, model) else: params['encoder_frnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim, model) params['encoder_rrnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim, model) # attention MLPs - Luong-style with extra v_a from Bahdanau # concatenation layer for h (hidden dim), c (2 * hidden_dim) params['w_c'] = model.add_parameters((3 * hidden_dim, 3 * hidden_dim)) # concatenation layer for h_input (hidden_dim), h_output (hidden_dim) params['w_a'] = model.add_parameters((hidden_dim, hidden_dim)) # concatenation layer for h (hidden dim), c (2 * hidden_dim) params['u_a'] = model.add_parameters((hidden_dim, 2 * hidden_dim)) # concatenation layer for h_input (2 * hidden_dim), h_output (hidden_dim) params['v_a'] = model.add_parameters((1, hidden_dim)) # 3 * HIDDEN_DIM + input_dim - gets the feedback output embedding, "input feeding" approach for attn params['decoder_rnn'] = dn.LSTMBuilder(layers, 3 * hidden_dim + input_dim, hidden_dim, model) print 'finished creating model' sys.stdout.flush() return model, params