def initialize_graph(self): assert self.h_layers > 0 for i in range(self.h_layers): layer = Layer(self.model, in_dim=self.vocab_size if i == 0 else self.h_dim, output_dim=self.h_dim, activation=self.activation) self.layers.append(layer) self.output_layers_dict["F0"] = Layer( self.model, self.h_dim, 2, activation=dynet.softmax, mlp=self.h_dim if self.add_hidden else 0) self.output_layers_dict["F1"] = Layer( self.model, self.h_dim, 2, activation=dynet.softmax, mlp=self.h_dim if self.add_hidden else 0) self.output_layers_dict["Ft"] = Layer( self.model, self.h_dim, 2, activation=dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)
def add_adversarial_loss(self, num_domains=2): # TODO try different hidden dimensions, e.g. half the dimension self.adv_layer = Layer(self.model, self.h_dim, num_domains, activation=dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)
def add_adversarial_loss(self, num_domains=2): if not self.adversarial_domains: # make sure they are set the latest here self.adversarial_domains = num_domains self.adv_layer = Layer(self.model, 2 * self.h_dim, num_domains, activation=dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)
def initialize_graph(self): assert self.h_layers > 0 for i in range(self.h_layers): layer = Layer(self.model, in_dim=self.vocab_size if i == 0 else self.h_dim, output_dim=2 if i == self.h_layers - 1 else self.h_dim, activation=dynet.softmax if i == self.h_layers - 1 else self.activation) self.layers.append(layer)
def build_computation_graph(self, num_words, num_chars): """ build graph and link to parameters """ # initialize the word embeddings and the parameters cembeds = None if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert(emb_dim==self.in_dim) num_words=len(set(embeddings.keys()).union(set(self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them wembeds = self.model.add_lookup_parameters((num_words, self.in_dim),init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim),init=dynet.ConstInitializer(0.01)) init=0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word]=len(self.w2i.keys()) # add new word wembeds.init_row(self.w2i[word], embeddings[word]) init+=1 print("initialized: {}".format(init), file=sys.stderr) else: wembeds = self.model.add_lookup_parameters((num_words, self.in_dim),init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim),init=dynet.ConstInitializer(0.01)) #make it more flexible to add number of layers as specified by parameter layers = [] # inner layers for layer_num in range(0,self.h_layers): if layer_num == 0: if self.c_in_dim > 0: f_builder = dynet.CoupledLSTMBuilder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) # in_dim: size of each layer b_builder = dynet.CoupledLSTMBuilder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) else: f_builder = dynet.CoupledLSTMBuilder(1, self.in_dim, self.h_dim, self.model) b_builder = dynet.CoupledLSTMBuilder(1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) b_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder,b_builder)) # store at which layer to predict task task_num_labels= len(self.tag2idx) output_layer = FFSequencePredictor(Layer(self.model, self.h_dim*2, task_num_labels, dynet.softmax)) if self.c_in_dim > 0: char_rnn = BiRNNSequencePredictor(dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model), dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) else: char_rnn = None predictors = {} predictors["inner"] = layers predictors["output_layers_dict"] = output_layer predictors["task_expected_at"] = self.h_layers return predictors, char_rnn, wembeds, cembeds
def build_computation_graph(self, num_words, num_chars): """ build graph and link to parameters """ # initialize the word embeddings and the parameters if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file, lower=self.lower) assert(emb_dim==self.in_dim) num_words=len(set(embeddings.keys()).union(set(self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them wembeds = self.model.add_lookup_parameters((num_words, self.in_dim)) cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim)) init=0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word]=len(self.w2i.keys()) # add new word wembeds.init_row(self.w2i[word], embeddings[word]) init+=1 print("initialized: {}".format(init), file=sys.stderr) else: wembeds = self.model.add_lookup_parameters((num_words, self.in_dim)) cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim)) #make it more flexible to add number of layers as specified by parameter layers = [] # inner layers output_layers_dict = {} # from task_id to actual softmax predictor task_expected_at = {} # map task_id => output_layer_# # connect output layers to tasks for output_layer, task_id in zip(self.pred_layer, self.tasks_ids): if output_layer > self.h_layers: raise ValueError("cannot have a task at a layer which is beyond the model, increase h_layers") task_expected_at[task_id] = output_layer print("task expected at", task_expected_at, file=sys.stderr) nb_tasks = len( self.tasks_ids ) print("h_layers:", self.h_layers, file=sys.stderr) for layer_num in range(0,self.h_layers): print(">>>", layer_num, "layer_num") if layer_num == 0: builder = dynet.LSTMBuilder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) # in_dim: size of each layer layers.append(BiRNNSequencePredictor(builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(builder)) # store at which layer to predict task for task_id in self.tasks_ids: task_num_labels= len(self.task2tag2idx[task_id]) output_layers_dict[task_id] = FFSequencePredictor(Layer(self.model, self.h_dim*2, task_num_labels, dynet.softmax)) sys.stderr.write('#\nOutput layers'+str(len(output_layers_dict))+'\n') char_rnn = RNNSequencePredictor(dynet.LSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) predictors = {} predictors["inner"] = layers predictors["output_layers_dict"] = output_layers_dict predictors["task_expected_at"] = task_expected_at return predictors, char_rnn, wembeds, cembeds
if layer_num == 0: f_builder = dy.LSTMBuilder(1, args.in_dim, args.h_dim, model) b_builder = dy.LSTMBuilder(1, args.in_dim, args.h_dim, model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) else: f_builder = dy.LSTMBuilder(1, args.h_dim, args.h_dim, model) b_builder = dy.LSTMBuilder(1, args.h_dim, args.h_dim, model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) predictors = {} predictors["inner"] = layers predictors["outer"] = {} for task_id in tasks_ids: task_num_labels = len(task2t2i[task_id]) predictors["outer"][task_id] = FFSequencePredictor( Layer(model, args.h_dim * 2, len(task_labels), dy.softmax)) # TRAINING train_data = list(zip(train_X, train_Y, task_labels)) print("%d training instances..." % len(train_data)) for iter in range(args.iters): print("Iteration:" + str(iter)) total_loss = 0.0 total_tagged = 0.0 random.shuffle(train_data) j = 0 for (word_indices, y, task_of_instance) in train_data: j += 1 if j % 200 == 0: print('\t' + str(j) + " instances...")
def build_computation_graph(self, num_words, num_chars): """ build graph and link to parameters self.predictors, self.char_rnn, self.wembeds, self.cembeds = """ ## initialize word embeddings if self.embeds_file: print("loading embeddings") embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert (emb_dim == self.in_dim) num_words = len( set(embeddings.keys()).union(set( self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=self.initializer) init = 0 for word in embeddings.keys(): if word not in self.w2i: self.w2i[word] = len(self.w2i.keys()) # add new word self.wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 elif word in embeddings: self.wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 print("initialized: {}".format(init)) del embeddings # clean up else: self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=self.initializer) ## initialize character embeddings self.cembeds = None if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=self.initializer) if self.lex_dim > 0 and self.embed_lex: # +1 for UNK property self.lembeds = self.model.add_lookup_parameters( (len(self.dictionary_values) + 1, self.lex_dim), init=dynet.GlorotInitializer()) #init=self.initializer) # make it more flexible to add number of layers as specified by parameter layers = [] # inner layers output_layers_dict = {} # from task_id to actual softmax predictor for layer_num in range(0, self.h_layers): if layer_num == 0: if self.c_in_dim > 0: # in_dim: size of each layer if self.lex_dim > 0 and self.embed_lex: lex_embed_size = self.lex_dim * len( self.dictionary_values) f_builder = self.builder( 1, self.in_dim + self.c_h_dim * 2 + lex_embed_size, self.h_dim, self.model) b_builder = self.builder( 1, self.in_dim + self.c_h_dim * 2 + lex_embed_size, self.h_dim, self.model) else: f_builder = self.builder( 1, self.in_dim + self.c_h_dim * 2 + self.lex_dim, self.h_dim, self.model) b_builder = self.builder( 1, self.in_dim + self.c_h_dim * 2 + self.lex_dim, self.h_dim, self.model) else: f_builder = self.builder(1, self.in_dim + self.lex_dim, self.h_dim, self.model) b_builder = self.builder(1, self.in_dim + self.lex_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor( f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = self.builder(1, self.h_dim, self.h_dim, self.model) b_builder = self.builder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) # store at which layer to predict task task2layer = { task_id: out_layer for task_id, out_layer in zip(self.task2tag2idx, self.pred_layer) } if len(task2layer) > 1: print("task2layer", task2layer) for task_id in task2layer: task_num_labels = len(self.task2tag2idx[task_id]) if not self.crf: output_layers_dict[task_id] = FFSequencePredictor( self.task2tag2idx[task_id], Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax, mlp=self.mlp, mlp_activation=self.activation_mlp)) else: print("CRF") output_layers_dict[task_id] = CRFSequencePredictor( self.model, task_num_labels, self.task2tag2idx[task_id], Layer(self.model, self.h_dim * 2, task_num_labels, None, mlp=self.mlp, mlp_activation=self.activation_mlp), viterbi_loss=self.viterbi_loss) self.char_rnn = BiRNNSequencePredictor( self.builder(1, self.c_in_dim, self.c_h_dim, self.model), self.builder(1, self.c_in_dim, self.c_h_dim, self.model)) self.predictors = {} self.predictors["inner"] = layers self.predictors["output_layers_dict"] = output_layers_dict self.predictors["task_expected_at"] = task2layer
def build_computation_graph(self, num_words, num_chars): """ build graph and link to parameters """ ## initialize word embeddings if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert(emb_dim==self.in_dim) num_words=len(set(embeddings.keys()).union(set(self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them wembeds = self.model.add_lookup_parameters((num_words, self.in_dim), init=self.initializer) init=0 for word in embeddings: if word not in self.w2i: self.w2i[word]=len(self.w2i.keys()) # add new word wembeds.init_row(self.w2i[word], embeddings[word]) init +=1 elif word in embeddings: wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 print("initialized: {}".format(init), file=sys.stderr) else: wembeds = self.model.add_lookup_parameters((num_words, self.in_dim), init=self.initializer) ## initialize character embeddings cembeds = None if self.c_in_dim > 0: cembeds = self.model.add_lookup_parameters((num_chars, self.c_in_dim), init=self.initializer) # make it more flexible to add number of layers as specified by parameter layers = [] # inner layers output_layers_dict = {} # from task_id to actual softmax predictor task_expected_at = {} # map task_id => output_layer_# # connect output layers to tasks for output_layer, task_id in zip(self.pred_layer, self.tasks_ids): if output_layer > self.h_layers: raise ValueError("cannot have a task at a layer (%d) which is " "beyond the model, increase h_layers (%d)" % (output_layer, self.h_layers)) task_expected_at[task_id] = output_layer nb_tasks = len( self.tasks_ids ) for layer_num in range(0,self.h_layers): if layer_num == 0: if self.c_in_dim > 0: # in_dim: size of each layer f_builder = self.builder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) b_builder = self.builder(1, self.in_dim+self.c_in_dim*2, self.h_dim, self.model) else: f_builder = self.builder(1, self.in_dim, self.h_dim, self.model) b_builder = self.builder(1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = self.builder(1, self.h_dim, self.h_dim, self.model) b_builder = self.builder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) # store at which layer to predict task for task_id in self.tasks_ids: task_num_labels= len(self.task2tag2idx[task_id]) output_layers_dict[task_id] = FFSequencePredictor(Layer(self.model, self.h_dim*2, task_num_labels, dynet.softmax, mlp=self.mlp, mlp_activation=self.activation_mlp)) char_rnn = BiRNNSequencePredictor(self.builder(1, self.c_in_dim, self.c_in_dim, self.model), self.builder(1, self.c_in_dim, self.c_in_dim, self.model)) predictors = {} predictors["inner"] = layers predictors["output_layers_dict"] = output_layers_dict predictors["task_expected_at"] = task_expected_at return predictors, char_rnn, wembeds, cembeds
def initialize_graph(self, num_words=None, num_chars=None): """ build graph and link to parameters F2=True: activate second auxiliary output Ft=True: activate third auxiliary output """ num_words = num_words if num_words is not None else len(self.w2i) num_chars = num_chars if num_chars is not None else len(self.c2i) if num_words == 0 or num_chars == 0: raise ValueError('Word2id and char2id have to be loaded before ' 'initializing the graph.') print('Initializing the graph...') # initialize the word embeddings and the parameters self.cembeds = None if self.embeds_file: print("loading embeddings", file=sys.stderr) embeddings, emb_dim = load_embeddings_file(self.embeds_file) assert (emb_dim == self.in_dim) num_words = len( set(embeddings.keys()).union(set( self.w2i.keys()))) # initialize all with embeddings # init model parameters and initialize them self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01)) init = 0 l = len(embeddings.keys()) for word in embeddings.keys(): # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers) if word in self.w2i: self.wembeds.init_row(self.w2i[word], embeddings[word]) else: self.w2i[word] = len(self.w2i.keys()) # add new word self.wembeds.init_row(self.w2i[word], embeddings[word]) init += 1 print("initialized: {}".format(init), file=sys.stderr) else: self.wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim), init=dynet.ConstInitializer(0.01)) if self.c_in_dim > 0: self.cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim), init=dynet.ConstInitializer(0.01)) # make it more flexible to add number of layers as specified by parameter layers = [] # inner layers for layer_num in range(0, self.h_layers): if layer_num == 0: if self.c_in_dim > 0: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) # in_dim: size of each layer b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim + self.c_in_dim * 2, self.h_dim, self.model) else: f_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) b_builder = dynet.CoupledLSTMBuilder( 1, self.in_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor( f_builder, b_builder)) #returns forward and backward sequence else: # add inner layers (if h_layers >1) f_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) b_builder = dynet.LSTMBuilder(1, self.h_dim, self.h_dim, self.model) layers.append(BiRNNSequencePredictor(f_builder, b_builder)) # store at which layer to predict task task_num_labels = len(self.tag2idx) output_layers_dict = {} output_layers_dict["F0"] = FFSequencePredictor( Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)) # for simplicity always add additional outputs, even if they are then not used output_layers_dict["F1"] = FFSequencePredictor( Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)) output_layers_dict["Ft"] = FFSequencePredictor( Layer(self.model, self.h_dim * 2, task_num_labels, dynet.softmax, mlp=self.h_dim if self.add_hidden else 0)) if self.c_in_dim > 0: self.char_rnn = BiRNNSequencePredictor( dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model), dynet.CoupledLSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) else: self.char_rnn = None self.predictors = dict() self.predictors["inner"] = layers self.predictors["output_layers_dict"] = output_layers_dict self.predictors["task_expected_at"] = self.h_layers
def output_generator(model, in_dim, out_dim): for layer in get_layer_params(query): mlp_activation, mlp = layer yield FFSequencePredictor(Layer(model, in_dim * 2, out_dim, dynet.softmax, mlp=int(mlp), mlp_activation=ACTIVATION_MAP[mlp_activation]))