def train(self, train=True): """Trains the parser on training data. Args: data: Training data, a list of sentences with gold trees. n_epochs: trunc_data: """ # Find pad value tag_pad = self.tag_dict['<PAD/>'] word_pad = self.word_dict['<PAD/>'] self.classifier = NeuralNetwork(self.config, self.x_embeddings, self.tags_embeddings, len(self.tag_dict_inv), feature_type='tagger_1') if not train: self.classifier.restore_sess() else: logging.info('Training NN for tagger!') # Generate all configurations for training x_tagger, y_tagger = generate_data_for_tagger( self.word_ids, self.tags, self.sent_lens, tag_pad, word_pad) x_dev_tagger, y_dev_tagger = generate_data_for_tagger( self.word_ids_dev, self.tags_dev, self.sent_lens_dev, tag_pad, word_pad) self.classifier.train(x_tagger, y_tagger, x_dev_tagger, y_dev_tagger) logging.info('Training NN for parser!')
def make_population(self, population_size, input_size, layer_sizes, lr=0.5): self.individuals = [] for i in range(population_size): network = NeuralNetwork() network.make(input_size, layer_sizes, lr) self.individuals.append(Individual(network))
def crossover(self, individual2, fitness): fitness1 = fitness(self) fitness2 = fitness(individual2) vector1 = self.neural_network.get_neuron_vector() vector2 = individual2.neural_network.get_neuron_vector() layer_sizes = self.neural_network.get_layer_sizes() #ver proporcion segun fitness central_point = int( round(fitness1 * len(vector1) / max(fitness1 + fitness2, 1))) # print("crsover f1 = %.3f, f2 = %.3f, cp = %d, len=%d" % (fitness1, fitness2, central_point, len(vector1))) #crossover neurons_final = vector1[:central_point] + vector2[central_point:] neural_network = NeuralNetwork() neural_network.make_from_neuron_vector(neurons_final, layer_sizes) return Individual(neural_network)
def train(self, train=False): """Trains the parser on training data. Args: data: Training data, a list of sentences with gold trees. n_epochs: trunc_data: """ # Find pad value word_pad_id = self.word_dict['<PAD/>'] tag_pad_id = self.tag_dict['<PAD/>'] # Find tags for training data for sent_id in range(self.tags.shape[0]): words = self.word_ids[sent_id, :int(self.sent_lens[sent_id])] words = words.tolist() words = [ str(int(w)) for w in words] words = [ self.word_dict_inv[w] for w in words] tag_pred = self.tagger.tag(words) tag_pred = [ self.tag_dict[w] for w in tag_pred] self.tags[sent_id,:int(self.sent_lens[sent_id])] = tag_pred # Find tags using the tagger x_parser, y_parser = self.generate_data_for_parser(self.word_ids, self.tags, self.gold_tree, self.sent_lens, tag_pad_id, word_pad_id) self.classifier = NeuralNetwork(self.config, self.x_embeddings, self.tags_embeddings, 3, feature_type = 'parser_1') if not train: self.classifier.restore_sess() else: num_data = x_parser.shape[0] num_train = int(num_data*0.99) x_train_parser = x_parser[:num_train,:] y_train_parser = y_parser[:num_train] x_dev_parser = x_parser[num_train:,:] y_dev_parser = y_parser[num_train:] self.classifier.train(x_train_parser, y_train_parser, x_dev_parser, y_dev_parser)
'--batchsize', help='Number of images per batch', default=1) parser.add_argument('-e', '--epochs', help='Number of epochs to train', default=1) args = parser.parse_args() image_dir = args.image_dir batch_size = int(args.batchsize) epochs = int(args.epochs) def get_image_paths(dir): paths = [] for dirpath, dirnames, filenames in walk(dir): for filename in filenames: if is_label(filename): paths.append( (join(dirpath, remove_label(filename)), join(dirpath, filename))) random.shuffle(paths) return paths with NeuralNetwork() as nn: nn.train(get_image_paths(image_dir), batch_size, epochs)
def createNetworkLayout(logger, preprocessor): ''' Returns the network with the specified layout. ''' # Create Neural Network network = NeuralNetwork() network.createSequentialModel() input_shape = (preprocessor.getNetworkData()['input'].shape[1], preprocessor.getNetworkData()['input'].shape[2]) vokab_length = len(preprocessor.getLabelEncoder().classes_) # Add Layers # units = how many nodes a layer should have # input_shape = shape of the data it will be training network.add(LSTM(units=256, input_shape=input_shape, return_sequences=True)) # rate = fraction of input units that should be dropped during training network.add(Dropout(rate=0.3)) network.add(LSTM(units=512, return_sequences=True)) network.add(Dropout(rate=0.3)) network.add(LSTM(units=256)) network.add(Dense(units=256)) network.add(Dropout(rate=0.3)) # units of last layer should have same amount of nodes as the number of different outputs that our system has # -> assures that the output of the network will map to our classes network.add(Dense(units=vokab_length)) network.add(Activation('softmax')) logger.info("Compiling model...") network.compile(_loss='categorical_crossentropy', _optimizer='rmsprop', _metrics=['acc']) logger.info("Finished compiling.") logger.info("Model Layers: \n[]".format(network._model.summary())) return network
class TaggerNN(): def __init__(self, config): """Initialises a new parser.""" self.config = config['tagger'] logging.getLogger().setLevel(logging.INFO) # Load embeddings self.x_embeddings, self.word_ids, self.tags_embeddings, self.tags, self.word_dict, self.word_dict_inv, self.tag_dict, self.tag_dict_inv, self.sent_lens, _ = load_processed_data( config['processed_data_location']) self.word_ids_dev, self.tags_dev, self.sent_lens_dev, _ = load_processed_data( config['processed_dev_data_location'], True) logging.info('Data loaded!') self.classifier = [] def features(self, words, i, pred_tags): # Convert words to word ids tag_pad = '<PAD/>' word_pad = '<PAD/>' b_w = words[i] if (i > 0 and i < len(words)) else word_pad bm1_w = words[i - 1] if (i - 1 > 0 and i - 1 < len(words)) else word_pad bm2_w = words[i - 2] if (i - 2 > 0 and i - 2 < len(words)) else word_pad bp1_w = words[i + 1] if (i + 1 > 0 and i + 1 < len(words)) else word_pad bp2_w = words[i + 2] if (i + 2 > 0 and i + 2 < len(words)) else word_pad bm1_t = pred_tags[i - 1] if (i - 1 > 0 and i - 1 < len(pred_tags)) else tag_pad bm2_t = pred_tags[i - 2] if (i - 2 > 0 and i - 2 < len(pred_tags)) else tag_pad feat_w = [bm2_w, bm1_w, b_w, bp1_w, bp2_w] feat_t = [bm2_t, bm1_t] feat_w_ids = [ self.word_dict[w] if w in self.word_dict else 0 for w in feat_w ] feat_w_ids = np.array(feat_w_ids) feat_t_ids = [self.tag_dict[t] for t in feat_t] feat_t_ids = np.array(feat_t_ids) feat = np.concatenate((feat_w_ids, feat_t_ids), axis=0) feat = np.reshape(feat, (1, -1)) return feat def tag(self, words): pred_tags = [] for i in range(len(words)): feat = self.features(words, i, pred_tags) tag = self.classifier.predict(feat) tag = self.tag_dict_inv[str(tag)] pred_tags.append(tag) return pred_tags def train(self, train=True): """Trains the parser on training data. Args: data: Training data, a list of sentences with gold trees. n_epochs: trunc_data: """ # Find pad value tag_pad = self.tag_dict['<PAD/>'] word_pad = self.word_dict['<PAD/>'] self.classifier = NeuralNetwork(self.config, self.x_embeddings, self.tags_embeddings, len(self.tag_dict_inv), feature_type='tagger_1') if not train: self.classifier.restore_sess() else: logging.info('Training NN for tagger!') # Generate all configurations for training x_tagger, y_tagger = generate_data_for_tagger( self.word_ids, self.tags, self.sent_lens, tag_pad, word_pad) x_dev_tagger, y_dev_tagger = generate_data_for_tagger( self.word_ids_dev, self.tags_dev, self.sent_lens_dev, tag_pad, word_pad) self.classifier.train(x_tagger, y_tagger, x_dev_tagger, y_dev_tagger) logging.info('Training NN for parser!')
def createNetworkLayout(logger, preprocessor, layout, loss, optimizer, activation, metrics, weightsPath=None, dropout=0.3, callbacks=[]): ''' Creates the network layout. Will validate the weightsPath so you dont have to take care of that. Returns the network with the specified layout. ''' # check for correctness and create folder if missing if not weightsPath is None: weightsPath = validateFolderPath(weightsPath, logger) # Create Neural Network network = NeuralNetwork() network.createSequentialModel() input_shape = (preprocessor.getNetworkData()['input'].shape[1], preprocessor.getNetworkData()['input'].shape[2]) vokab_length = len(preprocessor.getLabelEncoder().classes_) # Add Layers # units = how many nodes a layer should have # input_shape = shape of the data it will be training layout = layout if layout == 'default': network = defaultLayout(network, input_shape, dropout) elif layout == 'multi': network = multiLSTMLayout(network, input_shape, dropout) elif layout == 'bidirectional': network = bidirectionalLayout(network, input_shape, dropout) elif layout == 'multibidirectional': network = multibidirectionalLayout(network, input_shape, dropout) #elif layout == 'attention': # network = attentionLayout(network, input_shape) # units of last layer should have same amount of nodes as the number of different outputs that our system has # last layers are the same for every layout # -> assures that the output of the network will map to our classes network.add(Dense(units=vokab_length)) network.add(Activation(activation)) # compile network logger.info("Compiling model...") network.compile(_loss=loss, _path=weightsPath, _optimizer=optimizer, _metrics=metrics, _callbacks=callbacks) logger.info("Finished compiling.") #logger.info("Model Layers: \n[]".format(network._model.summary())) return network
class ParserNN(): """A transition-based dependency parser. This parser implements the arc-standard algorithm for dependency parsing. When being presented with an input sentence, it first tags the sentence for parts of speech, and then uses a multi-class perceptron classifier to predict a sequence of *moves* (transitions) that construct a dependency tree for the input sentence. Moves are encoded as integers as follows: SHIFT = 0, LEFT-ARC = 1, RIGHT-ARC = 2 At any given point in the predicted sequence, the state of the parser can be specified by: the index of the first word in the input sentence that the parser has not yet started to process; a stack holding the indices of those words that are currently being processed; and a partial dependency tree, represented as a list of indices such that `tree[i]` gives the index of the head (parent node) of the word at position `i`, or 0 in case the corresponding word has not yet been assigned a head. Attributes: tagger: A part-of-speech tagger. classifier: A multi-layer neural net classifier used to predict the next move of the parser. """ def __init__(self, config, tagger): """Initialises a new parser.""" self.config = config['parser'] logging.getLogger().setLevel(logging.INFO) # Load embeddings self.x_embeddings, self.word_ids, self.tags_embeddings, self.tags, self.word_dict, self.word_dict_inv, self.tag_dict, self.tag_dict_inv, self.sent_lens, self.gold_tree = load_processed_data(config['processed_data_location']) self.word_ids_dev, self.tags_dev, self.sent_lens_dev, self.gold_tree_dev = load_processed_data(config['processed_dev_data_location'], True) logging.info('Data loaded!') self.tagger = tagger self.classifier = [] def parse(self, words): """Parses a sentence. Args: words: The input sentence, a list of words. Returns: A pair consisting of the predicted tags and the predicted dependency tree for the input sentence. """ tags = self.tagger.tag(words) word_pad_id = self.word_dict['<PAD/>'] tag_pad_id = self.tag_dict['<PAD/>'] # Conver words to ids words = [ self.word_dict[w] if w in self.word_dict else 0 for w in words] # Convert tags to ids tags_ids = [self.tag_dict[t] for t in tags] # Parse i = 0 stack = [] pred_tree = [0] * len(words) while True: valid_moves = self.valid_moves(i, stack, pred_tree, words) if not valid_moves: break feat = self.features(words, tags_ids, range(i, len(words)), stack, pred_tree,tag_pad_id , word_pad_id ) feat = np.array(feat) feat = np.reshape(feat, (1,-1)) valid_pred = np.zeros(3,dtype=bool) valid_pred[valid_moves] = True move_to_do = self.classifier.predict(feat, valid_pred=valid_pred) (i, stack, pred_tree) = self.move(i, stack, pred_tree, move_to_do) return (tags, pred_tree) def valid_moves(self, i, stack, pred_tree, words): """Returns the valid moves for the specified parser configuration. Args: i: The index of the first unprocessed word. stack: The stack of words (represented by their indices) that are currently being processed. pred_tree: The partial dependency tree. Returns: The list of valid moves for the specified parser configuration. """ valid = [] if i < len(words): valid.append(0) if len(stack) >= 3: valid.append(1) valid.append(2) return valid def move(self, i, stack, pred_tree, move): """Executes a single move. Args: i: The index of the first unprocessed word. stack: The stack of words (represented by their indices) that are currently being processed. pred_tree: The partial dependency tree. move: The move that the parser should make. Returns: The new parser configuration, represented as a triple containing the index of the new first unprocessed word, stack, and partial dependency tree. """ if move == 0: stack.append(i) i += 1 elif move == 1: pred_tree[stack[-2]] = stack[-1] stack.remove(stack[-2]) elif move == 2: pred_tree[stack[-1]] = stack[-2] stack.remove(stack[-1]) return (i, stack, pred_tree) def features(self, words, tags, buffer, stack, parse, tag_pad , word_pad): """Extracts features for the specified parser configuration. Args: words: The input sentence, a list of words. gold_tags: The list of gold-standard tags for the input sentence. buffer: stack: The stack of words (represented by their indices) that are currently being processed. parse: The partial dependency tree. Returns: A feature vector for the specified configuration. """ # Single word features b1_w = words[buffer[0]] if buffer else word_pad b1_t = tags[buffer[0]] if buffer else tag_pad b2_w = words[buffer[1]] if len(buffer) > 1 else word_pad b2_t = tags[buffer[1]] if len(buffer) > 1 else tag_pad b3_w = words[buffer[2]] if len(buffer) > 2 else word_pad b3_t = tags[buffer[2]] if len(buffer) > 2 else tag_pad s1_w = words[stack[-1]] if stack else word_pad s1_t = tags[stack[-1]] if stack else tag_pad s2_w = words[stack[-2]] if len(stack) > 1 else word_pad s2_t = tags[stack[-2]] if len(stack) > 1 else tag_pad s3_w = words[stack[-3]] if len(stack) > 2 else word_pad s3_t = tags[stack[-3]] if len(stack) > 2 else tag_pad def is_parent(parent, child): if child == 0: return False if parent == child: return True return is_parent(parent, parse[child]) # Child that is the most on the left def lc1(parent): for i in range(0, len(words)): if is_parent(parent, i): return i return -1 # Child that is the most on the right def rc1(parent): for i in range(0, len(words), -1): if is_parent(parent, i): return i return -1 lc1_s1 = lc1(stack[-1]) if stack else -1 rc1_s1 = rc1(stack[-1]) if stack else -1 lc1_s2 = lc1(stack[-2]) if len(stack) > 1 else -1 rc1_s2 = rc1(stack[-2]) if len(stack) > 1 else -1 lc1_s1_t = tags[lc1_s1] if lc1_s1 >= 0 else tag_pad rc1_s1_t = tags[rc1_s1] if rc1_s1 >= 0 else tag_pad lc1_s2_t = tags[rc1_s2] if lc1_s2 >= 0 else tag_pad rc1_s2_t = tags[rc1_s2] if rc1_s2 >= 0 else tag_pad lc1_s1_w = words[lc1_s1] if lc1_s1 >= 0 else word_pad rc1_s1_w = words[rc1_s1] if rc1_s1 >= 0 else word_pad lc1_s2_w = words[rc1_s2] if lc1_s2 >= 0 else word_pad rc1_s2_w = words[rc1_s2] if rc1_s2 >= 0 else word_pad lc1_lc1_s1 = lc1(lc1_s1) if lc1_s1 >=0 else -1 lc1_lc1_s2 = lc1(lc1_s2) if lc1_s2 >=0 else -1 rc1_rc1_s1 = rc1(rc1_s1) if rc1_s1 >=0 else -1 rc1_rc1_s2 = rc1(rc1_s2) if rc1_s2 >=0 else -1 lc1_lc1_s1_t = tags[lc1_lc1_s1] if lc1_lc1_s1 >= 0 else tag_pad lc1_lc1_s2_t = tags[lc1_lc1_s2] if lc1_lc1_s2 >= 0 else tag_pad rc1_rc1_s1_t = tags[rc1_rc1_s1] if rc1_rc1_s1 >= 0 else tag_pad rc1_rc1_s2_t = tags[rc1_rc1_s2] if rc1_rc1_s2 >= 0 else tag_pad lc1_lc1_s1_w = words[lc1_lc1_s1] if lc1_lc1_s1 >= 0 else word_pad lc1_lc1_s2_w = words[lc1_lc1_s2] if lc1_lc1_s2 >= 0 else word_pad rc1_rc1_s1_w = words[rc1_rc1_s1] if rc1_rc1_s1 >= 0 else word_pad rc1_rc1_s2_w = words[rc1_rc1_s2] if rc1_rc1_s2 >= 0 else word_pad feat = [b1_w, b2_w, b3_w, s1_w, s2_w, s3_w, lc1_s1_w, rc1_s1_w, lc1_s2_w, rc1_s2_w, lc1_lc1_s1_w, lc1_lc1_s2_w, rc1_rc1_s1_w, rc1_rc1_s2_w, b1_t, b2_t, b3_t, s1_t, s2_t, s3_t, lc1_s1_t, rc1_s1_t, lc1_s2_t, rc1_s2_t ,lc1_lc1_s1_t, lc1_lc1_s2_t, rc1_rc1_s1_t, rc1_rc1_s2_t] """ feat = [b1_w, b2_w, b3_w, s1_w, s2_w, s3_w, lc1_s1_w, rc1_s1_w, lc1_s2_w, rc1_s2_w,b1_t, b2_t, b3_t, s1_t, s2_t, s3_t, lc1_s1_t, rc1_s1_t, lc1_s2_t, rc1_s2_t] """ return feat def train(self, train=False): """Trains the parser on training data. Args: data: Training data, a list of sentences with gold trees. n_epochs: trunc_data: """ # Find pad value word_pad_id = self.word_dict['<PAD/>'] tag_pad_id = self.tag_dict['<PAD/>'] # Find tags for training data for sent_id in range(self.tags.shape[0]): words = self.word_ids[sent_id, :int(self.sent_lens[sent_id])] words = words.tolist() words = [ str(int(w)) for w in words] words = [ self.word_dict_inv[w] for w in words] tag_pred = self.tagger.tag(words) tag_pred = [ self.tag_dict[w] for w in tag_pred] self.tags[sent_id,:int(self.sent_lens[sent_id])] = tag_pred # Find tags using the tagger x_parser, y_parser = self.generate_data_for_parser(self.word_ids, self.tags, self.gold_tree, self.sent_lens, tag_pad_id, word_pad_id) self.classifier = NeuralNetwork(self.config, self.x_embeddings, self.tags_embeddings, 3, feature_type = 'parser_1') if not train: self.classifier.restore_sess() else: num_data = x_parser.shape[0] num_train = int(num_data*0.99) x_train_parser = x_parser[:num_train,:] y_train_parser = y_parser[:num_train] x_dev_parser = x_parser[num_train:,:] y_dev_parser = y_parser[num_train:] self.classifier.train(x_train_parser, y_train_parser, x_dev_parser, y_dev_parser) def gold_move(self, i, stack, pred_tree, gold_tree, sent_len): if len(stack) < 3 and i < sent_len: return 0 elif len(stack) <= 1 and i >= sent_len: return None elif stack[-1] == gold_tree[stack[-2]]: left_arc = True for t in range(len(gold_tree)): if gold_tree[t] == stack[-2]: if pred_tree[t] != stack[-2]: left_arc = False if left_arc: return 1 elif stack[-2] == gold_tree[stack[-1]]: right_arc = True for t in range(len(gold_tree)): if gold_tree[t] == stack[-1]: if pred_tree[t] != stack[-1]: right_arc = False if right_arc: return 2 return 0 def generate_data_for_parser(self, x, tags, trees, sent_lens, tag_pad, word_pad): x_parser = [] y_parser = [] # Features: w_i-2, w_i-1, w_i, t_i, tag_i-1, tag_i-2 for i in range(x.shape[0]): sent = x[i,:].tolist() tag = tags[i,:].tolist() tree = trees[i,:].tolist() # Pad sentence to include root sent = [word_pad]*1 + sent[0:int(sent_lens[i])] tag = [tag_pad]*1 + tag[0:int(sent_lens[i])] tree = [0]*1 + tree[0:int(sent_lens[i])] tree = [int(i) for i in tree] pred_tree = [0] * len(tree) stack = [] buffer_pos = 0 while True: move_to_do = self.gold_move(buffer_pos, stack, pred_tree, tree, len(sent)) if move_to_do == None: break feat = self.features(sent, tag, range(buffer_pos, len(sent)), stack, pred_tree,tag_pad , word_pad) x_parser.append(feat) y_parser.append(move_to_do) (buffer_pos, stack, pred_tree) = self.move(buffer_pos, stack, pred_tree, move_to_do) x_parser = np.array(x_parser, dtype=np.float32) y_parser = np.array(y_parser, dtype=np.float32) return x_parser, y_parser