def __init__(self, data, opt): self.opt = opt self.model = dy.ParameterCollection() self.trainer = dy.MomentumSGDTrainer(self.model) self.w2i = data.w2i self.wdims = opt.embedding_size self.ldims = opt.hidden_size self.attsize = opt.attention_size self.ext_embeddings = data.ext_embeddings # Model Parameters self.wlookup = self.model.add_lookup_parameters((len(self.w2i), self.wdims)) self.__load_external_embeddings() if self.opt.encoder_dir == "single": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model) ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model) ] self.attention_w = self.model.add_parameters((self.attsize, self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters((1, self.ldims + 2 * self.ldims)) self.mlp_b = self.model.add_parameters(1) elif self.opt.encoder_dir == "bidirectional": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model), dy.GRUBuilder(1, self.wdims, self.ldims, self.model), ] self.attention_w = self.model.add_parameters((self.attsize, 2 * self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters((1, 2 * self.ldims + 4 * self.ldims)) self.mlp_b = self.model.add_parameters(1)
def load_and_populate(self, model_path): self.m = dy.ParameterCollection() with open(os.path.join(model_path, "info.pickle")) as inf: (self._w2i, self._c2i, self._jamo2i, self.arch, self.loss_type, self.zsize, self.wdim, self.cdim, self.jdim, self.width, self.swap, self.pseudocount) = pickle.load(inf) self.init_parameters(wembs={}) self.m.populate(os.path.join(model_path, "model"))
def __init__(self, rnn_num_of_layers, embeddings_size, state_size, vocab_size, char2int, int2char): self.model = dy.ParameterCollection() # the embedding paramaters self.embeddings = self.model.add_lookup_parameters( (vocab_size, embeddings_size)) # the rnn self.RNN = RNN_BUILDER(rnn_num_of_layers, embeddings_size, state_size, self.model) # project the rnn output to a vector of VOCAB_SIZE length self.output_w = self.model.add_parameters((vocab_size, state_size)) self.output_b = self.model.add_parameters((vocab_size)) self.int2char = int2char self.char2int = char2int
def __init__(self, char_dim, feat_dim, hidden_dim, char_size, feat_sizes): self._char_dim = char_dim self._feat_dim = feat_dim self._pc = dy.ParameterCollection() if config.adam: self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) else: # self._trainer = dy.AdadeltaTrainer(self._pc) trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate) trainer.set_clip_threshold(config.clip_threshold) # self._trainer.set_clip_threshold(1.0) self.params = dict() self.lp_c = self._pc.add_lookup_parameters((char_size, char_dim)) self.lp_feats = [] for idx in range(len(feat_sizes)): self.lp_feats.append(self._pc.add_lookup_parameters((feat_sizes[idx], feat_dim), init=dy.ConstInitializer(0.))) # self._pdrop_embs = pdrop_embs # self._pdrop_lstm = pdrop_lstm # self._pdrop_mlp = pdrop_mlp self.LSTM_builders = [] f = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) for i in range(config.layers - 1): f = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) self.dec_LSTM = dy.VanillaLSTMBuilder(1, hidden_dim, hidden_dim, self._pc) self.MLP = self._pc.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim)) self.MLP_bias = self._pc.add_parameters((hidden_dim)) self.classifier = self._pc.add_parameters((hidden_dim, char_size)) self.classifier_bias = self._pc.add_parameters((char_size)) self.MLP_attn = self.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim)) self.MLP_attn_bias = self.add_parameters((hidden_dim)) self.attn_weight = self._pc.add_parameters((char_dim))
def __init__(self, alphabet, num_layers=2, input_dim=3, hidden_dim=5, reject_threshold=0.01, RNNClass=LSTMNetwork, pc=None): self.alphabet = alphabet self.input_alphabet = list( self.alphabet ) #there's no reason to have these two "different" things and it doesn't really matter except i've made a mess that needs cleaning up so i need both names atm self.end_token = "<EOS>" # self.begin_token = "<BOS>" #if decide to add BOS again, need to remember to apply it to every input sequence and treat state after <BOS> as first state self.internal_alphabet = self.input_alphabet + [self.end_token ] #+[self.begin_token] self.int2char = list(self.internal_alphabet) self.char2int = {c: i for i, c in enumerate(self.int2char)} self.vocab_size = len(self.internal_alphabet) self.pc = pc if not None == pc else dy.ParameterCollection() self.lookup = self.pc.add_lookup_parameters( (self.vocab_size, input_dim)) self.linear_transform = LinearTransform(hidden_dim, self.vocab_size, self.pc) self.rnn = RNNClass(num_layers=num_layers, input_dim=input_dim, hidden_dim=hidden_dim, pc=self.pc) self.reject_threshold = reject_threshold # not a real state because LSTM, GRUs both have # h values, which are in [-1,1] self.store_expressions() # gets the initial state started, which is a roundabout way of enabling this: full_hidden_vec = self.rnn.initial_state.as_vec() self.sink_reject_vec = [2 for a in full_hidden_vec] self.all_losses = [] self.spec = { "alphabet": alphabet, "input_dim": input_dim, "num_layers": num_layers, "hidden_dim": hidden_dim, "reject_threshold": reject_threshold, "RNNClass": RNNClass }
def entailment(train_file, dev_file, test_file, embed_file, epochs, eps, reg_lambda, batch_size, per_log, LSTM_params, training_sample, sample_type, improvement): curr_time = strftime("%Y-%m-%d %H:%M:%S", gmtime()) print(curr_time + ": starting process") # read train and dev data sets train, train_words, max_len_train = read_data( train_file ) # read train data to list. each list item is a sentence. each sentence is a tuple dev, dev_words, max_len_dev = read_data( dev_file ) # read train data to list. each list item is a sentence. each sentence is a tuple test, test_words, max_len_test = read_data( test_file ) # read train data to list. each list item is a sentence. each sentence is a tuple P_rows = max([max_len_train, max_len_dev, max_len_test]) # unify all unique words to one set and delete independent sets all_words = train_words.union(dev_words).union(test_words) del train_words del dev_words del test_words # get embeddings embed_vec, vocab = get_embeddings(embed_file, all_words, LSTM_params[2]) # define vocabulary and help structures word2int = {w: i for i, w in enumerate(vocab)} label2int = { l: i for i, l in enumerate(["entailment", "neutral", "contradiction"]) } vocab_size = len(vocab) num_labels = 3 # create a classifier m = dy.ParameterCollection() trainer = dy.AdadeltaTrainer(m, eps) # define trainer snli_classifier = ReRead_LSTM(vocab_size, num_labels, LSTM_params, embed_vec, P_rows, m, improvement) # create classifier train_model(train, dev, test, epochs, batch_size, reg_lambda, trainer, snli_classifier, word2int, label2int, per_log, training_sample, sample_type, improvement)
def _xavier_initializer(shape, **kwargs): """Defines an initializer for the Xavier distribution. Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where epsilon = sqrt(6) / <sum of the sizes of shape's dimensions> e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3)) This function will be used as a variable initializer. Args: shape: Tuple or 1-d array that species the dimensions of the requested tensor. Returns: out: tf.Tensor of specified shape sampled from the Xavier distribution. """ ### YOUR CODE HERE m = dy.ParameterCollection() out = m.add_parameters(shape).as_array() ### END YOUR CODE return out
def __init__(self, w2i, options): print('Similarity Experiment - init') self.options = options self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model) self.w2i = w2i self.wdims = options.wembedding_dims self.ldims = options.lstm_dims self.ext_embeddings = None #Model Parameters self.wlookup = self.model.add_lookup_parameters((len(w2i), self.wdims)) self.__load_model() self.phrase_rnn = [dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model)] self.mlp_w = self.model.add_parameters((1, self.ldims)) self.mlp_b = self.model.add_parameters(1)
def main(): import sys action = sys.argv[1] emb_name = sys.argv[2] filen = sys.argv[3] testname = sys.argv[4] print 'setting up' train_words, train_tags, tag_idx, idx_tag, test_words, test_tags = setup(filen, testname) num_tags = len(tag_idx.keys()) gru_model = dy.ParameterCollection() word_index, embeddings_mat = import_embeddings(emb_name, 300) hidden_layer_len = 200 layers = 1 eparams = gru_model.lookup_parameters_from_numpy(embeddings_mat.A) #flatten matrix gru_unit = dy.GRUBuilder(layers, 300, hidden_layer_len, gru_model) param_mat = gru_model.add_parameters((hidden_layer_len, num_tags)) param_bias = gru_model.add_parameters((num_tags)) #gmodel.save("grumodel.model") #mdl2 = dy.ParameterCollection() #ep = mdl2.lookup_parameters_from_numpy(embeddings_mat.A) #parmat = mdl2.add_parameters((200, num_tags)) #parbias = mdl2.add_parameters((num_tags)) #gmodel.populate("grumodel.model") if action == 'train': print 'training' bsize, gmodel = training(3, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) if action == 'tune': print 'tuning' for r in range(3, 6): print 'training' print 'epochs: ', r bsize,gmodel = training(r, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) print 'testing' testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams) if action == 'test': print 'training' #use 5 epochs bsize, gmodel = training(5, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) print 'testing' testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams) return
def set_up_predictor(self, nmt_model_path): """Initializes the predictor with the given NMT model. """ model_folder = nmt_model_path best_model_path = model_folder + '/bestmodel.txt' hypoparams_file = model_folder + '/best.dev' hypoparams_file_reader = codecs.open(hypoparams_file, 'r', 'utf-8') hyperparams_dict = dict([ line.strip().split(' = ') for line in hypoparams_file_reader.readlines() ]) self.hyperparams = { 'INPUT_DIM': int(hyperparams_dict['INPUT_DIM']), 'HIDDEN_DIM': int(hyperparams_dict['HIDDEN_DIM']), #'FEAT_INPUT_DIM': int(hyperparams_dict['FEAT_INPUT_DIM']), 'LAYERS': int(hyperparams_dict['LAYERS']), 'VOCAB_PATH': hyperparams_dict['VOCAB_PATH'] } self.pc = dy.ParameterCollection() # print 'Loading vocabulary from {}:'.format(self.hyperparams['VOCAB_PATH']) # self.vocab = Vocab.from_file(self.hyperparams['VOCAB_PATH']) # BEGIN_CHAR = u'<s>' # STOP_CHAR = u'</s>' # UNK_CHAR = u'<unk>' # self.BEGIN = self.vocab.w2i[BEGIN_CHAR] # self.STOP = self.vocab.w2i[STOP_CHAR] # self.UNK = self.vocab.w2i[UNK_CHAR] self.BEGIN = utils.GO_ID self.STOP = utils.EOS_ID self.UNK = utils.UNK_ID # self.hyperparams['VOCAB_SIZE'] = self.vocab.size() print 'Model Hypoparameters:' for k, v in self.hyperparams.items(): print '{:20} = {}'.format(k, v) print print 'Loading model from: {}'.format(best_model_path) self.fbuffRNN, self.bbuffRNN, self.VOCAB_LOOKUP, self.decoder, self.R, self.bias, self.W_c, self.W__a, self.U__a, self.v__a = dy.load( best_model_path, self.pc)
def __init__(self, in_dim, h_dim, c_in_dim, h_layers, pred_layer, embeds_file=None, activation=ACTIVATION_MAP["tanh"], mlp=0, activation_mlp=ACTIVATION_MAP["rectify"], backprob_embeds=True, noise_sigma=0.1, tasks_ids=[], initializer=INITIALIZER_MAP["glorot"], builder=BUILDERS["lstmc"], max_vocab_size=None): self.w2i = {} # word to index mapping self.c2i = {} # char to index mapping self.tasks_ids = tasks_ids # list of names for each task self.task2tag2idx = {} # need one dictionary per task self.pred_layer = [int(layer) for layer in pred_layer ] # at which layer to predict each task self.model = dynet.ParameterCollection() #init model self.in_dim = in_dim self.h_dim = h_dim self.c_in_dim = c_in_dim self.activation = activation self.mlp = mlp self.activation_mlp = activation_mlp self.noise_sigma = noise_sigma self.h_layers = h_layers self.predictors = { "inner": [], "output_layers_dict": {}, "task_expected_at": {} } # the inner layers and predictors self.wembeds = None # lookup: embeddings for words self.cembeds = None # lookup: embeddings for characters self.embeds_file = embeds_file self.backprob_embeds = backprob_embeds self.initializer = initializer self.char_rnn = None # biRNN for character input self.builder = builder # default biRNN is an LSTM self.max_vocab_size = max_vocab_size
def main(): parser = argparse.ArgumentParser() parser.add_argument('input', help='Input file containing saved parameters.') parser.add_argument( '--hidden-units', type=int, default=256, help='Number of hidden units used in the LSTM controller.') parser.add_argument( '--source-alphabet-size', type=int, default=128, help='Number of symbols to use in the source sequence.') parser.add_argument('--embedding-size', type=int, default=64, help='Input embedding size.') parser.add_argument( '--stack-embedding-size', type=int, default=256, help='Size of vector values stored on the neural stack.') parser.add_argument( '--test-length-range', type=parse_range, default=(65, 128), help='Range of lengths of source sequences during testing.', metavar='MIN,MAX') parser.add_argument('--test-data-size', type=int, default=1000, help='Number of samples used in the test data.') args = parser.parse_args() params = dynet.ParameterCollection() builder = StackLSTMBuilder(params, source_alphabet_size=args.source_alphabet_size, embedding_size=args.embedding_size, stack_embedding_size=args.stack_embedding_size, hidden_units=args.hidden_units) params.populate(args.input) test(args, builder)
def main(): print('Invoked as:', ' '.join(sys.argv), file=sys.stderr) parser = argparse.ArgumentParser() parser.add_argument('train_corpus') parser.add_argument('dev_corpus') parser.add_argument('--layers', type=int, default=1) parser.add_argument('--hidden_dim', type=int, default=128) parser.add_argument('--minibatch_size', type=int, default=1) parser.add_argument('--autobatch', action='store_true') parser.add_argument('--tied', action='store_true') parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--output', type=str, default='') harness.add_optimizer_args(parser) args = parser.parse_args() if args.output == '': args.output = '/tmp/model%d' % random.randint(0, 0xFFFF) print('Output file:', args.output, file=sys.stderr) action_vocab = Vocabulary() terminal_vocab = Vocabulary() rel_vocab = Vocabulary() train_corpus = read_corpus(args.train_corpus, action_vocab, terminal_vocab, rel_vocab) action_vocab.frozen = True terminal_vocab.frozen = True rel_vocab.frozen = True dev_corpus = read_corpus(args.dev_corpus, action_vocab, terminal_vocab, rel_vocab) print('Vocabulary sizes:', len(action_vocab), len(terminal_vocab), len(rel_vocab), file=sys.stderr) pc = dy.ParameterCollection() optimizer = harness.make_optimizer(args, pc) model = BottomUpDepLM(pc, action_vocab, len(terminal_vocab), len(rel_vocab), args.layers, args.hidden_dim, False, args.tied) print('Total parameters:', pc.parameter_count(), file=sys.stderr) harness.train(model, train_corpus, dev_corpus, optimizer, args)
def __init__(self, vocab, options): random.seed(1) self.model = dy.ParameterCollection() self.trainer = helpers.get_trainer(options, self.model) self.get_violation = helpers.update_method(options) word_count = vocab.word_freq word_vocab = vocab.wordlookup_tbl pos_vocab = vocab.poslookup_tbl rel_vocab = vocab.rellookup_tbl self.rels = rel_vocab self.__enc = helpers.get_encoder(self.model, options, word_count, word_vocab, pos_vocab) self.__scr = helpers.get_scorer(self.model, options, rel_vocab) self.__tree_enc = TreeEncoder.get_tree_encoder(self.model, options, rel_vocab) self.__train_flag = True self.oracle = options.oracle self.exploration_rate = options.exploration_rate
def __init__(self, w2i, permissions, options): super().__init__(options) self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model) self.w2i = w2i self.wdims = options.wembedding_dims self.ldims = options.lstm_dims self.all_permissions = permissions # Model Parameters self.wlookup = self.model.add_lookup_parameters((len(w2i), self.wdims)) # RNNs self.sentence_rnn = [ dy.SimpleRNNBuilder(1, self.wdims, self.ldims, self.model) ] if options.external_embedding is not None: self.__load_external_embeddings()
def train(self, model_path, articles_X, articles_Y, dev_articles_X, dev_articles_Y, lrate, epochs): self.m = dy.ParameterCollection() self._prepare_model_directory(model_path) wseqs = [ wseq for wseq_list in articles_X + articles_Y for wseq in wseq_list ] self.build_dicts(wseqs, [[]]) self.init_parameters() self._train_report(articles_X, articles_Y, lrate, epochs, 1) self.common.turn_on_training(0) # No dropout best_hY_X = float("inf") trainer = dy.AdamTrainer(self.m, lrate) for epoch in xrange(epochs): self._log("Epoch {0:2d} ".format(epoch + 1), False) epoch_start_time = time.time() inds = [i for i in xrange(len(articles_Y))] random.shuffle(inds) avg_loss = 0.0 for data_num, i in enumerate(inds): if (data_num + 1) % 100 == 0: print data_num + 1, sys.stdout.flush() loss = self.get_loss(articles_X[i], articles_Y[i]) avg_loss += loss.value() / len(inds) loss.backward() trainer.update() self._log("updates: {0} ".format(len(inds)), False) self._log("avg_loss: {0:.2f} ".format(avg_loss), False) self._log("({0:.1f}s) ".format(time.time() - epoch_start_time), False) self.common.turn_off_training() hY_X = self.test(dev_articles_X, dev_articles_Y) self.common.turn_on_training(0) # No dropout self._log("dev {0:.2f} ".format(hY_X), False) if hY_X < best_hY_X: best_hY_X = hY_X self._log("new best - saving ", False) self.save(model_path) self._log("")
def _create_model(self): self.logger.info('Creating the model...') model = dy.ParameterCollection() # context gru encoders c_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) c_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # question gru encoders q_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) q_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # embedding parameter lookup_params = model.add_lookup_parameters((self.model_args["vocab_size"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) unk_lookup_params = model.add_lookup_parameters((self.model_args["number_of_unks"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) self.logger.info('Done creating the model') model_parameters = {"c_fwdRnn": c_fwdRnn, "c_bwdRnn": c_bwdRnn, "q_fwdRnn": q_fwdRnn, "q_bwdRnn": q_bwdRnn, "lookup_params": lookup_params, "unk_lookup_params": unk_lookup_params} return model, model_parameters
def allocate_params(self): """ Allocates memory for the model parameters. """ self.model = dy.ParameterCollection() self.word_embeddings = self.model.add_lookup_parameters( (self.lexicon.size(), self.word_embedding_size)) self.rnn = dy.LSTMBuilder( 2, self.word_embedding_size + self.char_embedding_size, self.hidden_size, self.model) self.char_rnn = CharRNNBuilder(self.char_embedding_size, self.char_embedding_size, self.charset, self.model) self.word_softmax = dy.ClassFactoredSoftmaxBuilder( self.hidden_size, self.brown_file, self.lexicon.words2i, self.model, bias=True)
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0,1,2,3 self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k self.recursive_composition = options.use_recursive_composition #ugly hack #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) + (1 if self.recursive_composition else 0) self.feature_extractor = FeatureExtractor(self.model,options,vocab,self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1) else: mlp_in_dims = options.lstm_input_size*self.nnvecs*(self.k+1) self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
def __init__( self, src_vocab, tgt_vocab, src_emb_dim, tgt_emb_dim, enc_nlayers, enc_hidden_dim, # encoder settings dec_nlayers, dec_hidden_dim, # decoder settings att_dim, label_smoothing): # Model settings self.label_smoothing = label_smoothing # Contains all of the model's parameters self.pc = dy.ParameterCollection() # Vocabulary objects self.src_vocab = src_vocab self.tgt_vocab = tgt_vocab # Embeddings self.src_embeddings = self.pc.add_lookup_parameters( (len(self.src_vocab), src_emb_dim), name='src-embedding') self.tgt_embeddings = self.pc.add_lookup_parameters( (len(self.tgt_vocab), tgt_emb_dim), name='tgt-embedding') # Init encoder/decoder self.encoder = BiLSTMEncoder(self.pc, enc_nlayers, src_emb_dim, enc_hidden_dim) self.decoder = LSTMDecoder(self.pc, dec_nlayers, tgt_emb_dim, dec_hidden_dim, len(tgt_vocab), enc_hidden_dim) # Init attention self.attention = MLPAttention(self.pc, att_dim, enc_hidden_dim, dec_hidden_dim) # For affine transform between last state of encoder and first state of decoder self.W_bridge = self.pc.add_parameters( (dec_hidden_dim, enc_hidden_dim)) # TODO: make class? self.b_bridge = self.pc.add_parameters(dec_hidden_dim) # Softmax over tgt vocab self.W_sm = self.pc.add_parameters((len(tgt_vocab), dec_hidden_dim)) self.b_sm = self.pc.add_parameters(len(tgt_vocab)) # For storing matrix of encodings produced by encoder self.encodings = None
def run_test2(args): model = dy.ParameterCollection() # [parser] = dy.load(args.model_path_base, model) [parser] = dy.load( "models/chartdyRBTC-model_addr_dytree_giga_0.4_200_1_chartdyRBTC_dytree_1_houseno_0_0_dev=0.90", model) test_chunk_insts = util.read_chunks(args.test_path, args.normal) # ftreelog = open(args.expname + '.test.predtree.txt', 'w', encoding='utf-8') ftreelog = open('aaa' + '.test.predtree.txt', 'w', encoding='utf-8') test_predicted = [] test_start_time = time.time() test_predicted = [] test_gold = [] for inst in test_chunk_insts: chunks = util.inst2chunks(inst) test_gold.append(chunks) for x, chunks in test_chunk_insts: dy.renew_cg() sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) pred_tree = predicted.convert() ftreelog.write(pred_tree.linearize() + '\n') test_predicted.append(pred_tree.to_chunks()) ftreelog.close() # test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.test.txt') # evalb test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename='aaaabbbb' + '.test.txt') # evalb print("test-fscore {} " "test-elapsed {} ".format( test_fscore, format_elapsed(test_start_time), ))
def train(self, allSentences, trainManager, devData, predictTrain): if not self.__parser.handlesNonProjectiveTrees(): sentences = utils.filterNonProjective(allSentences) self.__logger.info("Filtered %i non-projective trees " % (len(allSentences) - len(sentences))) else: sentences = allSentences # fill all dictionaries self.__reprBuilder.readData(sentences) self.__labeler.readData(sentences) # initialize parameters self.__model = dynet.ParameterCollection() self.__initializeParameters() # for logging trainLogger = NNParserTrainLogger(predictTrain) self.__trainOnSentences(sentences, devData, trainManager, trainLogger, predictTrain)
def run_test(args): if not os.path.exists(args.experiment_directory): os.mkdir(args.experiment_directory) print("Loading test trees from {}...".format(args.input_file)) test_treebank = trees.load_trees(args.input_file) test_tokenized_lines = parse_trees_to_string_lines(test_treebank) test_embeddings_file = compute_elmo_embeddings(test_tokenized_lines, os.path.join( args.experiment_directory, 'test_embeddings')) print("Loaded {:,} test examples.".format(len(test_treebank))) print("Loading model from {}...".format(args.model_path)) model = dy.ParameterCollection() [parser] = dy.load(args.model_path, model) print("Parsing test sentences...") check_performance(parser, test_treebank, test_embeddings_file, args)
def init_model_c(vocab, tag_set, trained_model): model = dy.ParameterCollection() params = {} TAGSET_SIZE = len(tag_set) VOCAB_SIZE = len(vocab) params["lookup"] = model.add_lookup_parameters((VOCAB_SIZE, LSTM_INPUT_DIM), init='uniform', scale=(np.sqrt(6)/np.sqrt(LSTM_INPUT_DIM))) f1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) b1_lstm = dy.LSTMBuilder(LSTM_LAYERS, LSTM_INPUT_DIM, LSTM_STATE_DIM, model) f2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) b2_lstm = dy.LSTMBuilder(LSTM_LAYERS, 2 * LSTM_STATE_DIM, LSTM_STATE_DIM, model) lstms = (f1_lstm, b1_lstm, f2_lstm, b2_lstm) params["w"] = model.add_parameters((TAGSET_SIZE, 2 * LSTM_STATE_DIM)) params["bias"] = model.add_parameters((TAGSET_SIZE)) model.populate(trained_model) trainer = dy.AdamTrainer(model) return (lstms, params, model, trainer)
def load_model(path, model_version): full_saving_path = os.path.join(path, model_version) new_model_obj = pickle.load(open(full_saving_path + ".p", "rb")) model_to_load = dy.ParameterCollection() W_emb, W_cnn, b_cnn, W_mlp, b_mlp, V_mlp, a_mlp = dy.load( full_saving_path, model_to_load) new_model_obj.W_emb = W_emb new_model_obj.W_cnn = W_cnn new_model_obj.b_cnn = b_cnn new_model_obj.W_mlp = W_mlp new_model_obj.b_mlp = b_mlp new_model_obj.V_mlp = V_mlp new_model_obj.a_mlp = a_mlp # converting default dict into dict, since pickle can only save dict objects and not defaultdict ones new_model_obj.w2i = defaultdict(lambda: len(new_model_obj.w2i), new_model_obj.w2i) new_model_obj.t2i = defaultdict(lambda: len(new_model_obj.t2i), new_model_obj.t2i) new_model_obj.model = model_to_load return new_model_obj
def train(self, data, dev=None): self.m = dy.ParameterCollection() self.__init_params(data) self.__enable_lstm_dropout() self.__is_training = True if os.path.isfile(self.model_path): os.remove(self.model_path) if not os.path.exists(self.model_path): os.makedirs(self.model_path) trainer = dy.AdamTrainer(self.m, self.learning_rate) perf_best = 0. exists = False for epoch in xrange(self.epochs): inds = [i for i in xrange(len(data.seqs))] random.shuffle(inds) for i in inds: loss = self.get_loss(data.seqs[i]) loss.backward() trainer.update() if dev: self.__is_training = False self.__disable_lstm_dropout() perf, _ = self.get_perf(dev) print "Epoch {0:d} F1: {1:.2f}".format(epoch + 1, perf), if perf > perf_best: perf_best = perf print 'new best - saving model', self.save() exists = True self.__is_training = True self.__enable_lstm_dropout() print else: self.save() if exists: m = Mention2Vec() m.load_and_populate(self.model_path) perf, _ = m.get_perf(dev) print "Best dev F1: {0:.2f}".format(perf)
def test_softmax_model(): """Train softmax model for a number of steps.""" config = Config() # Generate random data to train the model on np.random.seed(1234) inputs = np.random.rand(config.n_samples, config.n_features) labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32) labels[:, 1] = 1 #for i in xrange(config.n_samples): # labels[i, i%config.n_classes] = 1 mini_batches = [[ inputs[k:k + config.batch_size], labels[k:k + config.batch_size] ] for k in xrange(0, config.n_samples, config.batch_size)] m = dy.ParameterCollection() trainer = dy.SimpleSGDTrainer(m) trainer.learning_rate = config.lr net = SoftmaxModel(config, m) for epoch in range(config.n_epochs): start_time = time.time() for mini_batch in mini_batches: dy.renew_cg() losses = [] for ix in xrange(config.batch_size): l = net.create_network_return_loss( np.array(mini_batch[0][ix]).reshape(1, config.n_features), np.array(mini_batch[1][ix]).reshape(1, config.n_classes)) losses.append(l) loss = dy.esum(losses) / config.batch_size loss.forward() loss.backward() trainer.update() duration = time.time() - start_time print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format( epoch, loss.value(), duration) print loss.value() assert loss.value() < .5 print "Basic (non-exhaustive) classifier tests pass"
def test_get_bilstm_all_update(self): pc = dy.ParameterCollection() trainer = dy.AdamTrainer(pc, 0.1) flstm = dy.LSTMBuilder(1, 1, 1, pc) blstm = dy.LSTMBuilder(1, 1, 1, pc) model = Model() common = CommonArchitecture(model) def make_inputs(): return [dy.inputTensor([1.0]), dy.inputTensor([2.0]), dy.inputTensor([3.0]), dy.inputTensor([4.0])] def test(sqnorm_original_value, assert_equal): dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) if assert_equal: self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) else: self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm, False)) sqnorm = dy.squared_norm(avg) sqnorm_original_value = sqnorm.value() sqnorm.backward() trainer.update() # Shouldn't update LSTMs. test(sqnorm_original_value, True) dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) sqnorm.backward() trainer.update() # Should update LSTMs. test(sqnorm_original_value, False)
def __init__(self, config, pretrained_embeddings, parser): self.config = config print len(pretrained_embeddings) self.m = dy.ParameterCollection() self.Initializer = dy.ConstInitializer(0.0) self.pW = self.m.add_parameters( (self.config.n_features * self.config.embed_size, self.config.hidden_size)) self.pB1 = self.m.add_parameters((1, self.config.hidden_size), init=self.Initializer) self.pU = self.m.add_parameters( (self.config.hidden_size, self.config.n_classes)) self.pB2 = self.m.add_parameters((1, self.config.n_classes), init=self.Initializer) self.word_lookup = self.m.lookup_parameters_from_numpy( pretrained_embeddings) self.pos_lookup = self.m.add_lookup_parameters( (self.config.n_pos, self.config.embed_size)) self.trainer = dy.AdamTrainer(self.m)
def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001): self._model = dy.ParameterCollection() self._input_dim = input_dim self._hidden_dim = hidden_dim self._output_dim = output_dim self._rnn = dy.SimpleRNNBuilder(self.LAYERS, self._input_dim, self._hidden_dim, self._model) # self._rnn.disable_dropout() self._W = self._model.add_parameters( (self._output_dim, self._hidden_dim), init=dy.NormalInitializer()) self._learning_rate = learning_rate self._trainer = dy.MomentumSGDTrainer( self._model, learning_rate=self._learning_rate) self._l2_param = 0.0006 # self._l2_param = 0.0 self._init_layers()