def build_model(self, pc, best_model_path): if best_model_path: print 'Loading model from: {}'.format(best_model_path) self.RNN, self.VOCAB_LOOKUP, self.R, self.bias = dy.load(best_model_path, pc) else: # LSTM self.RNN = dy.CoupledLSTMBuilder(self.hyperparams['LAYERS'], self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM'], pc) # embedding lookups for vocabulary self.VOCAB_LOOKUP = pc.add_lookup_parameters((self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM'])) # softmax parameters self.R = pc.add_parameters((self.hyperparams['VOCAB_SIZE'], self.hyperparams['HIDDEN_DIM'])) self.bias = pc.add_parameters(self.hyperparams['VOCAB_SIZE']) print 'Model dimensions:' print ' * VOCABULARY EMBEDDING LAYER: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM']) print print ' * LSTM: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM']) print ' LSTM has {} layer(s)'.format(self.hyperparams['LAYERS']) print print ' * SOFTMAX: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['HIDDEN_DIM'], self.hyperparams['VOCAB_SIZE']) print
def __init__(self, params, source_alphabet_size, embedding_size, hidden_units, stack_embedding_size): input_size = source_alphabet_size + 2 output_size = source_alphabet_size + 1 self.stack_embedding_size = stack_embedding_size self.input_embeddings = params.add_lookup_parameters( (input_size, embedding_size), name='input-embeddings') self.output_embeddings = params.add_lookup_parameters( (output_size, embedding_size), name='output-embeddings') self.controller = dy.CoupledLSTMBuilder( 1, embedding_size + stack_embedding_size, hidden_units, params) # Intentionally set the gain for the sigmoid layers low, since this # seems to work better gain = 0.5 self.pop_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), # Initialize the pop bias to -1 to allow information to propagate # through the stack bias_initializer=dy.ConstInitializer(-1.0), name='pop-strength') self.push_strength_layer = add_layer( params, hidden_units, 1, sigmoid, weights_initializer=dy.GlorotInitializer(False, gain=gain), bias_initializer=dy.GlorotInitializer(False, gain=gain), name='push-strength') self.push_value_layer = add_layer( params, hidden_units, stack_embedding_size, tanh, name='push-value') self.output_layer = combine_layers([ add_layer(params, hidden_units, hidden_units, tanh, name='output'), # This adds an extra affine layer between the tanh and the softmax add_layer(params, hidden_units, output_size, linear, name='softmax') ])
def _init_params(self): """ Defines all model parameters. """ self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.word_lookup = self.model.add_lookup_parameters( (self.num_words, self.word_embedding_size)) # self.chars_lookup = self.model.add_lookup_parameters((self.num_chars, self.char_embedding_size)) # word-level LSTMs self.word_lstm_input_size = self.word_embedding_size # + 2 * self.char_embedding_size self.fwd_word_rnn = dy.CoupledLSTMBuilder( self.word_num_hidden_layers, # number of layers self.word_lstm_input_size, # input dimension self.word_hidden_output_size, # output dimension self.model) self.bwd_word_rnn = dy.CoupledLSTMBuilder(self.word_num_hidden_layers, self.word_lstm_input_size, self.word_hidden_output_size, self.model) # char-level LSTMs # self.fwd_char_rnn = dy.CoupledLSTMBuilder(self.char_num_hidden_layers, # self.char_embedding_size, # self.char_embedding_size, # self.model) # self.bwd_char_rnn = dy.CoupledLSTMBuilder(self.char_num_hidden_layers, # self.char_embedding_size, # self.char_embedding_size, # self.model) # set variational dropout if self.word_dropout: self.fwd_word_rnn.set_dropout(0.2) self.bwd_word_rnn.set_dropout(0.2) # if self._char_dropout: # self.fwd_char_rnn.set_dropout(0.2) # self.bwd_char_rnn.set_dropout(0.2) self.softmax_weight = self.model.add_parameters( (self.num_labels, self.word_hidden_output_size * 2)) self.softmax_bias = self.model.add_parameters((self.num_labels, ))
def build_model(self, pc, best_model_path): if best_model_path: print 'Loading model from: {}'.format(best_model_path) self.fbuffRNN, self.bbuffRNN, self.VOCAB_LOOKUP, self.decoder, self.R, self.bias, self.W_c, self.W__a, self.U__a, self.v__a = dy.load(best_model_path, pc) else: # BiLSTM for input self.fbuffRNN = dy.CoupledLSTMBuilder(self.hyperparams['LAYERS'], self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM'], pc) self.bbuffRNN = dy.CoupledLSTMBuilder(self.hyperparams['LAYERS'], self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM'], pc) # embedding lookups for vocabulary self.VOCAB_LOOKUP = pc.add_lookup_parameters((self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM'])) # decoder LSTM self.decoder = dy.CoupledLSTMBuilder(self.hyperparams['LAYERS'], self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM'], pc) # softmax parameters self.R = pc.add_parameters((self.hyperparams['VOCAB_SIZE'], 3 * self.hyperparams['HIDDEN_DIM'])) self.bias = pc.add_parameters(self.hyperparams['VOCAB_SIZE']) # attention MLPs - Loung-style with extra v_a from Bahdanau # concatenation layer for h (hidden dim), c (2 * hidden_dim) self.W_c = pc.add_parameters((3 * self.hyperparams['HIDDEN_DIM'], 3 * self.hyperparams['HIDDEN_DIM'])) # attention MLP's - Bahdanau-style # concatenation layer for h_input (2*hidden_dim), h_output (hidden_dim) self.W__a = pc.add_parameters((self.hyperparams['HIDDEN_DIM'], self.hyperparams['HIDDEN_DIM'])) # concatenation layer for h (hidden dim), c (2 * hidden_dim) self.U__a = pc.add_parameters((self.hyperparams['HIDDEN_DIM'], 2 * self.hyperparams['HIDDEN_DIM'])) # concatenation layer for h_input (2*hidden_dim), h_output (hidden_dim) self.v__a = pc.add_parameters((1, self.hyperparams['HIDDEN_DIM'])) print 'Model dimensions:' print ' * VOCABULARY EMBEDDING LAYER: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM']) print print ' * ENCODER biLSTM: IN-DIM: {}, OUT-DIM: {}'.format(2*self.hyperparams['INPUT_DIM'], 2*self.hyperparams['HIDDEN_DIM']) print ' * DECODER LSTM: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM']) print ' All LSTMs have {} layer(s)'.format(self.hyperparams['LAYERS']) print print ' * SOFTMAX: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['HIDDEN_DIM'], self.hyperparams['VOCAB_SIZE']) print
def initialize_graph(self, num_words=None, num_chars=None): """ build graph and link to parameters """ num_words = num_words if num_words is not None else len(self.w2i) num_chars = num_chars if num_chars is not None else len(self.c2i) if num_words == 0 or num_chars == 0: raise ValueError('Word2id and char2id have to be loaded before ' 'initializing the graph.') print('Initializing the graph...') # initialize the word embeddings and the parameters self.cembeds = None if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert (emb_dim == self.in_dim) num_words = len( set(embeddings.keys()).union(set( self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01), name="wembeds".encode("utf-8")) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01), name="cembeds".encode("utf-8")) init = 0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: self.wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word] = len(self.w2i.keys()) # add new word self.wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 print("initialized: {}".format(init), file=sys.stderr) else: self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01), name="wembeds".encode("utf-8")) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01), name="cembeds".encode("utf-8")) # make it more flexible to add number of layers as specified by parameter layers = [] # inner layers for layer_num in range(0, self.h_layers): if layer_num == 0: if self.c_in_dim > 0: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) # in_dim: size of each layer b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) else: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor( f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) b_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) # store at which layer to predict task task_num_labels = len(self.tag2idx) output_layer = FFSequencePredictor( Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax)) if self.c_in_dim > 0: self.char_rnn = BiRNNSequencePredictor( dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model), dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) else: self.char_rnn = None self.predictors = dict() self.predictors["inner"] = layers self.predictors["output_layers_dict"] = output_layer self.predictors["task_expected_at"] = self.h_layers
def build_computation_graph(self, num_words, num_chars): """ build graph and link to parameters """ # initialize the word embeddings and the parameters cembeds = None if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert(emb_dim==self.in_dim) num_words=len(set(embeddings.keys()).union(set(self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them wembeds = self.model.add_lookup_parameters((num_words, self.in_dim),init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim),init=dynet.ConstInitializer(0.01)) init=0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word]=len(self.w2i.keys()) # add new word wembeds.init_row(self.w2i[word], embeddings[word]) init+=1 print("initialized: {}".format(init), file=sys.stderr) else: wembeds = self.model.add_lookup_parameters((num_words, self.in_dim),init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim),init=dynet.ConstInitializer(0.01)) #make it more flexible to add number of layers as specified by parameter layers = [] # inner layers for layer_num in range(0,self.h_layers): if layer_num == 0: if self.c_in_dim > 0: f_builder = dynet.CoupledLSTMBuilder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) # in_dim: size of each layer b_builder = dynet.CoupledLSTMBuilder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) else: f_builder = dynet.CoupledLSTMBuilder(1, self.in_dim, self.h_dim, self.model) b_builder = dynet.CoupledLSTMBuilder(1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) b_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder,b_builder)) # store at which layer to predict task task_num_labels= len(self.tag2idx) output_layer = FFSequencePredictor(Layer(self.model, self.h_dim*2, task_num_labels, dynet.softmax)) if self.c_in_dim > 0: char_rnn = BiRNNSequencePredictor(dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model), dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) else: char_rnn = None predictors = {} predictors["inner"] = layers predictors["output_layers_dict"] = output_layer predictors["task_expected_at"] = self.h_layers return predictors, char_rnn, wembeds, cembeds
def setUp(self): # create model self.m = dy.ParameterCollection() self.rnn = dy.CoupledLSTMBuilder(2, 10, 10, self.m)
def add_parameters(self, dropout, lstm_size, optimizer, model_type, include_embeddings, gru=True): if model_type == "gru": self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) else: self.encoder_rnn = dy.CoupledLSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.CoupledLSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.CoupledLSTMBuilder( NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) global DROPOUT DROPOUT = dropout self.W1 = self.model.add_parameters((200, lstm_size)) self.b1 = self.model.add_parameters((200, 1)) self.W2 = self.model.add_parameters((100, 200)) self.b2 = self.model.add_parameters((100, 1)) self.W3 = self.model.add_parameters((len(self.C2I), 100)) self.b3 = self.model.add_parameters((len(self.C2I), 1)) self.W_query = self.model.add_parameters((lstm_size, lstm_size)) self.W_key = self.model.add_parameters((lstm_size, lstm_size)) self.W_val = self.model.add_parameters((lstm_size, lstm_size)) self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE)) self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE)) self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size)) self.b_att = self.model.add_parameters((lstm_size, 1)) self.b_direct = self.model.add_parameters((len(self.C2I), 1)) self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE)) self.latin_semantic_rep = {} #self.W_latin_embeddings = self.model.add_lookup_parameters((EMBEDDING_SIZE, 100)) #self.W_latin_embeddings2 = self.model.add_lookup_parameters((EMBEDDING_SIZE, EMBEDDING_SIZE)) if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model) elif optimizer == "rms": self.trainer = dy.RMSPropTrainer(self.model) if optimizer == "cyclic": self.trainer = dy.CyclicalSGDTrainer(self.model) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model) else: self.trainer = dy.AdagradTrainer(self.model)
def process(options,args): """Do the processing...""" # sorry for the ugly global variables... it's research code... global train, dev, train_labelset_goldtags , dev_labelset_goldtags, test1, test2 global vw, vt, vc, UNK, nwords, ntags, nchars, model, trainer global WORDS_LOOKUP, CHARS_LOOKUP, p_t1, pH,pO, fwdRNN,bwdRNN,cFwdRNN,cBwdRNN global DEVSET_EVAL_INTERVAL, SKIP_NON_RELEVANT, PATIENCE, BEST_DEV_F1, CHARACTER_THRESHOLD, BEST_MODEL, DUMMYTAGSET global CHAR_EMBEDDING_SIZE, WORD_EMBEDDING_SIZE, HIDDEN_OUTPUT_SIZE, STOP_LABELSET_EVAL_F1, NBR_OF_CLASSES, CLASSDIST global general_stats, dev_confusion MAX_EPOCHS = options.max_epochs DEVSET_EVAL_INTERVAL = 5000 SKIP_NON_RELEVANT = False PATIENCE = 5 BEST_DEV_F1 = 0.0 MINIMUM_DEV_F1_SCORE = options.minimum_dev_f1_score BEST_DEV_F1_SCORE = 0 CHARACTER_THRESHOLD = 5 BEST_MODEL = None DUMMYTAGSET = set(['O']) CHAR_EMBEDDING_SIZE = 20 WORD_EMBEDDING_SIZE = 64 # must be even number HIDDEN_OUTPUT_SIZE = 64 # must be even number STOP_LABELSET_EVAL_F1 = 0.7100 NBR_OF_CLASSES = 20 CLASSDIST = { "Allgemein": 14066, "Zugfahrt": 3583, "Sonstige_Unregelm\u00e4ssigkeiten": 3361, "Atmosph\u00e4re": 2135, "Sicherheit": 1140, "Ticketkauf": 1005, "Service_und_Kundenbetreuung": 670, "DB_App_und_Website": 570, "Informationen": 491, "Connectivity": 441, "Auslastung_und_Platzangebot": 431, "Komfort_und_Ausstattung": 214, "Gastronomisches_Angebot": 131, "Barrierefreiheit": 103, "Image": 93, "Reisen_mit_Kindern": 68, "Design": 60, "Toiletten": 56, "Gep\u00e4ck": 16 } # some general stats on general_stats = defaultdict(Counter) dev_confusion = Counter() # format of files: each line is "word1/tag2 word2/tag2 ..." train_file = options.train_file dev_file = options.dev_file test1_file = options.test1_file test2_file = options.test2_file train = list(read(train_file)) print >> sys.stderr, '#TRAINING SET SEQUENCE SIZE', len(train) print >> sys.stderr, '#TRAINING: NUMBER OF CLASSES',len(CLASSDIST) train_labelset_goldtags = seqlabel2labelset(train) dev = list(read(dev_file, dataset="DEV")) print >> sys.stderr, '#DEV SET SEQUENCE SIZE', len(dev) dev_labelset_goldtags = seqlabel2labelset(dev) output_dataset(dev, filename='gold' + '__' + 'devset' + '.tsv', meta={}) test1 = list(read(test1_file, dataset="TEST")) print >> sys.stderr, '#TEST1 SET SEQUENCE SIZE', len(test1) output_dataset(test1, filename='gold' + '__' + 'test1set' + '.tsv', meta={}) test2 = list(read(test2_file, dataset="TEST")) print >> sys.stderr, '#TEST2 SET SEQUENCE SIZE', len(test2) global words, tags, wc output_dataset(test2, filename='gold' + '__' + 'test2set' + '.tsv', meta={}) words = [] tags = [] chars_counter = Counter() wc = Counter() for sent in train: for w, p in sent: words.append(w) tags.append(p) chars_counter.update(w) wc[w] += 1 words.append("_UNK_") words.append("__D__") # Dummy words for sentences without an explicit cateogorie for c in chars_counter.keys(): if chars_counter[c] < CHARACTER_THRESHOLD: del chars_counter[c] chars = set(chars_counter) chars.add("<*>") vw = Vocab.from_corpus([words]) vt = Vocab.from_corpus([tags]) vc = Vocab.from_corpus([chars]) UNK = vw.w2i["_UNK_"] nwords = vw.size() ntags = vt.size() nchars = vc.size() print >> sys.stderr, '# NUMBER OF DIFFERENT WORDS', nwords print >> sys.stderr, '# NUMBER OF DIFFERENT TAGS', ntags, vt.w2i print >> sys.stderr, '# NUMBER OF DIFFERENT CHARACTERS', nchars, vc.w2i for statistics in general_stats: print >> sys.stderr, '#STATISTICS' for k, c in general_stats[statistics].most_common(): print >> sys.stderr, "%s\t%s\t%d" % (statistics, k, c) # DyNet Starts model = dy.Model() trainer = dy.AdamTrainer(model) #trainer = dy.AdadeltaTrainer(model) WORDS_LOOKUP = model.add_lookup_parameters((nwords, WORD_EMBEDDING_SIZE)) CHARS_LOOKUP = model.add_lookup_parameters((nchars, CHAR_EMBEDDING_SIZE)) p_t1 = model.add_lookup_parameters((ntags, NBR_OF_CLASSES)) # MLP on top of biLSTM outputs 2*HIDDEN_OUTPUT_SIZE -> HIDDEN_OUTPUT_SIZE -> ntags pH = model.add_parameters((HIDDEN_OUTPUT_SIZE, HIDDEN_OUTPUT_SIZE * 2)) pO = model.add_parameters((ntags, HIDDEN_OUTPUT_SIZE)) # word-level LSTMs fwdRNN = dy.CoupledLSTMBuilder(2, WORD_EMBEDDING_SIZE, HIDDEN_OUTPUT_SIZE, model) # layers, in-dim, out-dim, model bwdRNN = dy.CoupledLSTMBuilder(2, WORD_EMBEDDING_SIZE, HIDDEN_OUTPUT_SIZE, model) # char-level LSTMs cFwdRNN = dy.CoupledLSTMBuilder(2, CHAR_EMBEDDING_SIZE, WORD_EMBEDDING_SIZE / 2, model) cBwdRNN = dy.CoupledLSTMBuilder(2, CHAR_EMBEDDING_SIZE, WORD_EMBEDDING_SIZE / 2, model) num_tagged = cum_loss = 0 sample_iter_count = best_sample_iter_count = 0 for ITER in xrange(MAX_EPOCHS): random.shuffle(train) for i,s in enumerate(train,1): sample_iter_count += 1 best_sample_iter_count += 1 if i > 0 and i % (DEVSET_EVAL_INTERVAL / 2) == 0: # print status #trainer.status() print >> sys.stderr, 'AVERAGE LOSS: %.4f' % (cum_loss / num_tagged) cum_loss = num_tagged = 0 if i % DEVSET_EVAL_INTERVAL == 0 or i == len(train)-1: # eval on dev dev_system = tag_dataset(dev,'DEV') dev_labelset_eval_dict = eval_dataset(dev_system, dev, 'DEV') dev_labelset_eval_dict['ITERATION'] = i if dev_labelset_eval_dict['F'] > MINIMUM_DEV_F1_SCORE and dev_labelset_eval_dict['F'] > BEST_DEV_F1_SCORE: BEST_DEV_F1_SCORE = dev_labelset_eval_dict['F'] PATIENCE = 5 output_dataset(dev_system, filename=options.model_identifier +'__'+'devset' + '.tsv',meta=dev_labelset_eval_dict) system_test1 = tag_dataset(test1, 'TEST') system_test2 = tag_dataset(test2, 'TEST') output_dataset(system_test1, filename=options.model_identifier +'__'+'testset1_' + '.tsv', meta=dev_labelset_eval_dict) output_dataset(system_test2, filename=options.model_identifier +'__'+'testset2_' + '.tsv', meta=dev_labelset_eval_dict) if BEST_DEV_F1_SCORE > 0.0: if PATIENCE > 0: PATIENCE -= 1 else: exit(0) # train on sent words = [w for w,t in s] golds = [t for w,t in s] loss_exp = sent_loss(words, golds) cum_loss += loss_exp.scalar_value() num_tagged += len(golds) loss_exp.backward() trainer.update() print >> sys.stderr, "epoch %r finished" % ITER
def __init__(self, characters_vocab, tag_vocab, LSTM_NUM_OF_LAYERS=1, EMBEDDINGS_SIZE=32, STATE_SIZE=100, ATTENTION_SIZE=100, MINIBATCH_SIZE=1, COPY_WEIGHT=0.8, DROPOUT_PROB=0.2, EOS="<EOS>", NULL="<NULL>", MAX_PREDICTION_LEN_DEF=20, LENGTH_NORM_WEIGHT=0.1, USE_ATT_REG=False, USE_TAG_ATT_REG=False, PREDICT_LANG=False): self.model = dy.Model() self.characters = characters_vocab self.tags = tag_vocab self.int2char = list(self.characters) self.char2int = {c: i for i, c in enumerate(self.characters)} self.int2tag = list(self.tags) self.tag2int = {c: i for i, c in enumerate(self.tags)} self.VOCAB_SIZE = len(self.characters) self.TAG_VOCAB_SIZE = len(self.tags) self.LSTM_NUM_OF_LAYERS = LSTM_NUM_OF_LAYERS self.EMBEDDINGS_SIZE = EMBEDDINGS_SIZE self.STATE_SIZE = STATE_SIZE self.ATTENTION_SIZE = ATTENTION_SIZE self.MINIBATCH_SIZE = MINIBATCH_SIZE self.COPY_WEIGHT = COPY_WEIGHT self.DROPOUT_PROB = DROPOUT_PROB self.MAX_PREDICTION_LEN_DEF = MAX_PREDICTION_LEN_DEF self.LENGTH_NORM_WEIGHT = LENGTH_NORM_WEIGHT self.USE_ATT_REG = USE_ATT_REG self.USE_TAG_ATT_REG = USE_TAG_ATT_REG self.PREDICT_LANG = PREDICT_LANG self.EOS = EOS self.NULL = NULL self.enc_fwd_lstm = dy.CoupledLSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.enc_bwd_lstm = dy.CoupledLSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.dec_lstm = dy.CoupledLSTMBuilder( self.LSTM_NUM_OF_LAYERS, self.STATE_SIZE * 3 + self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.input_lookup = self.model.add_lookup_parameters( (self.VOCAB_SIZE, self.EMBEDDINGS_SIZE)) self.tag_input_lookup = self.model.add_lookup_parameters( (self.TAG_VOCAB_SIZE, self.EMBEDDINGS_SIZE)) self.attention_w1 = self.model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE * 2)) self.attention_w2 = self.model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE * self.LSTM_NUM_OF_LAYERS * 2)) self.attention_w3 = self.model.add_parameters((self.ATTENTION_SIZE, 5)) self.attention_v = self.model.add_parameters((1, self.ATTENTION_SIZE)) self.decoder_w = self.model.add_parameters( (self.VOCAB_SIZE, self.STATE_SIZE)) self.decoder_b = self.model.add_parameters((self.VOCAB_SIZE)) #output_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE)) self.output_lookup = self.input_lookup self.enc_tag_lstm = dy.CoupledLSTMBuilder(self.LSTM_NUM_OF_LAYERS, self.EMBEDDINGS_SIZE, self.STATE_SIZE, self.model) self.tag_attention_w1 = self.model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE)) self.tag_attention_w2 = self.model.add_parameters( (self.ATTENTION_SIZE, self.STATE_SIZE * self.LSTM_NUM_OF_LAYERS * 2)) self.tag_attention_v = self.model.add_parameters( (1, self.ATTENTION_SIZE))