def __init__(self, model_files=(), config=None, beam=1): super().__init__(config=config or Config(), models=list(map(Model, (model_files,) if isinstance(model_files, str) else model_files or (config.args.classifier,)))) self.beam = beam # Currently unused self.best_score = self.dev = self.test = self.iteration = self.epoch = self.batch = None self.trained = self.save_init = False self.accuracies = {}
def average_f1(scores, eval_type=None): for e in (eval_type or get_eval_type(scores), ) + EVAL_TYPES: try: return scores.average_f1(e) except ValueError as ex: Config().print([e, str(ex)], level=0) pass return 0
def load_extra(self, d): Config().args.lstm_layers = self.lstm_layers = d["lstm_layers"] Config().args.lstm_layer_dim = self.lstm_layer_dim = d.get( "lstm_layer_dim", Config().args.lstm_layer_dim) Config().args.embedding_layers = self.embedding_layers = d.get( "embedding_layers", Config().args.embedding_layers) Config().args.embedding_layer_dim = self.embedding_layer_dim = d.get( "embedding_layer_dim", Config().args.embedding_layer_dim) Config().args.max_length = self.max_length = d.get( "max_length", Config().args.max_length)
def add(self): assert self.tag is not None, "No tag given for new edge %s -> %s" % (self.parent, self.child) assert self.parent is not self.child, "Trying to create self-loop edge on %s" % self.parent if Config().args.verify: assert self not in self.parent.outgoing, "Trying to create outgoing edge twice: %s" % self assert self not in self.child.incoming, "Trying to create incoming edge twice: %s" % self assert self.parent not in self.child.descendants, "Detected cycle created by edge: %s" % self self.parent.add_outgoing(self) self.child.add_incoming(self)
def read_passages(args, files): expanded = [ f for pattern in files for f in sorted(glob(pattern)) or (pattern, ) ] return ioutil.read_files_and_dirs(expanded, sentences=args.sentences, paragraphs=args.paragraphs, converters=CONVERTERS, lang=Config().args.lang)
def get_ucca_passage(sentence): source_path = Path(tempfile.mkdtemp()) / 'source.txt' with source_path.open('w') as f: f.write(sentence + '\n') argv = ['script_name', str(source_path)] with unittest.mock.patch('sys.argv', argv): Config.reload() args = Config().args train_passages, dev_passages, test_passages = [ read_passages(args, arg) for arg in (args.train, args.dev, args.passages) ] ucca_passages = [ ucca_passage for (ucca_passage, ) in PARSER.parse( test_passages, evaluate=[], display=False, write=False) ] assert len(ucca_passages) == 1 return ucca_passages[0]
def main_generator(): args = Config().args assert args.passages or args.train, "Either passages or --train is required (use -h for help)" assert args.models or args.train or args.folds, "Either --model or --train or --folds is required" assert not ( args.train or args.dev ) or not args.folds, "--train and --dev are incompatible with --folds" assert args.train or not args.dev, "--dev is only possible together with --train" if args.folds: fold_scores = [] all_passages = list(read_passages(args, args.passages)) assert len(all_passages) >= args.folds, \ "%d folds are not possible with only %d passages" % (args.folds, len(all_passages)) Config().random.shuffle(all_passages) folds = [all_passages[i::args.folds] for i in range(args.folds)] for i in range(args.folds): print("Fold %d of %d:" % (i + 1, args.folds)) dev_passages = folds[i] test_passages = folds[(i + 1) % args.folds] train_passages = [ passage for fold in folds if fold is not dev_passages and fold is not test_passages for passage in fold ] s = list( train_test(train_passages, dev_passages, test_passages, args, "_%d" % i)) if s and s[-1] is not None: fold_scores.append(s[-1]) if fold_scores: scores = Scores(fold_scores) print("Average test F1 score for each fold: " + ", ".join("%.3f" % average_f1(s) for s in fold_scores)) print("Aggregated scores across folds:\n") scores.print() yield scores else: # Simple train/dev/test by given arguments train_passages, dev_passages, test_passages = [ read_passages(args, arg) for arg in (args.train, args.dev, args.passages) ] yield from train_test(train_passages, dev_passages, test_passages, args)
def __init__(self, passage): l1 = passage.layer(layer1.LAYER_ID) self.nodes_remaining = { node.ID for node in l1.all if node is not l1.heads[0] and ( Config().args.linkage or node.tag != layer1.NodeTags.Linkage) and (Config().args.implicit or not node.attrib.get("implicit")) } self.edges_remaining = { edge for node in passage.nodes.values() for edge in node if (Config().args.linkage or edge.tag not in (layer1.EdgeTags.LinkRelation, layer1.EdgeTags.LinkArgument)) and (Config().args.implicit or not edge.child.attrib.get("implicit")) and (Config().args.remote or not edge.attrib.get("remote")) } self.passage = passage self.edge_found = False self.log = None
def main(): if not os.path.exists(MODELS_DIR): os.makedirs(MODELS_DIR) Config().args.no_write = True out_file = os.environ.get("PARAMS_FILE", "params.csv") word_vectors_files = [os.environ[f] for f in os.environ if f.startswith("WORD_VECTORS")] num = int(os.environ.get("PARAMS_NUM", 30)) np.random.seed() domains = ( ("seed", 2147483647), # max value for int ("classifier", (config.MLP_NN, config.BILSTM_NN)), ("update_word_vectors", [True, False]), ("word_vectors", [None] + word_vectors_files), ("word_dim_external", (0, 300)), ("word_dim", (0, 50, 100, 200, 300)), ("tag_dim", (5, 10, 20)), ("dep_dim", (5, 10, 20)), ("label_dim", (5, 10, 20)), ("punct_dim", (1, 2, 3)), ("gap_dim", (1, 2, 3)), ("action_dim", (3, 5, 10)), ("layer_dim", (50, 100, 200, 300, 500, 1000)), ("layers", [1] + 5 * [2]), ("lstm_layer_dim", (50, 100, 200, 300, 500, 1000)), ("lstm_layers", [1] + 5 * [2]), ("embedding_layer_dim", (50, 100, 200, 300, 500, 1000)), ("embedding_layers", 5 * [1] + [2]), ("activation", config.ACTIVATIONS), ("init", 5 * [config.INITIALIZATIONS[0]] + list(config.INITIALIZATIONS)), ("batch_size", (10, 30, 50, 100, 200, 500)), ("minibatch_size", (50, 100, 200, 300, 500, 1000)), ("optimizer", 5 * [config.OPTIMIZERS[0]] + list(config.OPTIMIZERS)), ("swap_importance", (1, 2)), ("iterations", range(1, 51)), ("word_dropout", (0, .1, .2, .25, .3)), ("word_dropout_external", (0, .1, .2, .25, .3)), ("dynet_weight_decay", (1e-7, 1e-6, 1e-5, 1e-4)), ("dropout", (0, .1, .2, .3, .4, .5)), ) params = [Params(OrderedDict(p)) for p in zip(*[[(n, v.item() if hasattr(v, "item") else v) for v in np.random.choice(vs, num)] for n, vs in domains])] print("All parameter combinations to try:") print("\n".join(map(str, params))) print("Saving results to '%s'" % out_file) with open(out_file, "w") as f: csv.writer(f).writerow(params[0].get_field_titles()) for param in params: param.run() with open(out_file, "a") as f: param.write_scores(csv.writer(f)) best = max(params, key=Params.score) print("Best parameters: %s" % best)
def add_node(self, *args, **kwargs): """ Called during parsing to add a new Node (not core.Node) to the temporary representation :param args: ordinal arguments for Node() :param kwargs: keyword arguments for Node() """ node = Node(len(self.nodes), *args, **kwargs) if Config().args.verify: assert node not in self.nodes, "Node already exists" self.nodes.append(node) self.log.append("node: %s" % node) return node
def set_node(self, state, node_dropout=0): self.node = None if state is None or node_dropout and node_dropout > Config().random.random_sample(): return try: if self.source == "s": self.node = state.stack[-1 - self.index] elif self.source == "b": self.node = state.buffer[self.index] else: # source == "a" self.node = state.actions[-1 - self.index] for relative in self.relatives: nodes = self.node.parents if relative.isupper() else self.node.children if relative.lower() == "r": if len(nodes) == 1: raise ValueError("Avoiding identical right and left relatives") self.node = nodes[-1] else: # relative.lower() == "l" self.node = nodes[0] except (IndexError, TypeError, AttributeError, IndexError, ValueError): if Config().args.missing_node_features or node_dropout and node_dropout > Config().random.random_sample(): self.node = None
def assert_possible_edge(): parent, child = self.get_parent_child(action) assert_possible_parent(parent) assert_possible_child(child) if parent is self.root and Config().args.constraints: assert child.text is None, "Root may not have terminal children, but is being added '%s'" % child assert action.tag in Constraints.TopLevel, "The root may not have %s edges" % action.tag # if Config().args.multiple_edges: # Removed this option because it is not useful right now # edge = Edge(parent, child, action.tag, remote=action.remote) # assert edge not in parent.outgoing, "Edge must not already exist: %s" % edge # else: assert child not in parent.children, "Edge must not already exist: %s->%s" % (parent, child) assert parent not in child.descendants, "Detected cycle created by edge: %s->%s" % (parent, child)
def score(self, features): """ Calculate score for each label :param features: extracted feature values, of size input_size :return: array with score for each label """ super(NeuralNetwork, self).score(features) if self._iteration > 0: return self.evaluate(features).npvalue()[:self.num_labels] else: if Config().args.verbose >= 2: print(" no updates done yet, returning zero vector.") return np.zeros(self.num_labels)
def __init__(self, model_type, filename, labels, input_params=None, model=None): """ :param labels: tuple of lists of labels that can be updated later to add new labels :param input_params: dict of feature type name -> FeatureInformation :param model: if given, copy the weights (from a trained model) """ self.model = None self.model_type = model_type self.filename = filename self.labels = tuple(labels) self.input_params = input_params self._num_labels = self.num_labels self.input_dim = None self.is_frozen = model is not None self.updates = 0 self.epoch = 0 self.learning_rate = Config().args.learning_rate self.learning_rate_decay = Config().args.learning_rate_decay
def update(self, features, pred, true, importance=1): """ Update classifier weights according to predicted and true labels :param features: extracted feature values, of size input_size :param pred: label predicted by the classifier (non-negative integer less than num_labels) :param true: true label (non-negative integer less than num_labels) :param importance: add this many samples with the same features """ super(NeuralNetwork, self).update(features, pred, true, importance) for _ in range(int(importance)): self._losses.append( dy.pick(self.evaluate(features, train=True), true)) if Config().args.dynet_viz: dy.print_graphviz() sys.exit(0)
def init_data(self, param): if param.data is not None or isinstance(param, NumericFeatureParameters): return param.num = self.feature_extractor.num_features_non_numeric( param.effective_suffix) if param.dim: if param.external: vectors = self.get_word_vectors(param) param.data = UnknownDict(vectors, np.zeros(param.dim)) else: param.data = defaultdict( lambda d=param.dim: Config().random.normal(size=d)) _ = param.data[UnknownDict.UNKNOWN] # Initialize unknown value param.empty = np.zeros(param.dim, dtype=float)
def assert_possible_parent(node): assert node.text is None, "Terminals may not have children: %s" % node.text assert not node.implicit, "Implicit nodes may not have children: %s" % s0 if Config().args.constraints: assert action.tag not in Constraints.UniqueOutgoing or action.tag not in node.outgoing_tags, \ "Outgoing edge tag %s must be unique, but %s already has one" % ( action.tag, node) assert action.tag not in Constraints.MutuallyExclusiveOutgoing or not \ node.outgoing_tags & Constraints.MutuallyExclusiveOutgoing, \ "Outgoing edge tags %s are mutually exclusive, but %s already has %s and is being added %s" % ( Constraints.MutuallyExclusiveOutgoing, node, node.outgoing_tags, action.tag) assert action.tag in Constraints.ChildlessOutgoing or not \ node.incoming_tags & Constraints.ChildlessIncoming, \ "Units with incoming %s edges may not have children, and %s has incoming %s" % ( Constraints.ChildlessIncoming, node, node.incoming_tags)
def score(self, features, axis): """ Calculate score for each label :param features: extracted feature values, of size input_size :param axis: axis of the label we are predicting :return: array with score for each label """ super(NeuralNetwork, self).score(features, axis) if self.updates > 0: return self.evaluate(features, axis).npvalue()[:self.num_labels[axis]] else: if Config().args.verbose > 2: print(" no updates done yet, returning zero vector.") return np.zeros(self.num_labels[axis])
def finalize(self, finished_epoch=False): """ Fit this model on collected samples :return self """ super(NeuralNetwork, self).finalize() if self.model is None: self.init_model() if self._losses: loss = -dy.esum(self._losses) loss.forward() if Config().args.verbose >= 2: print("Total loss from %d time steps: %g" % (len(self._losses), loss.value())) loss.backward() self._trainer.update() self.init_cg() self._losses = [] self._iteration += 1 if finished_epoch: self._trainer.update_epoch() if Config().args.verbose: self._trainer.status() return self
def __init__(self, model_file=None, model_type=None, beam=1): self.args = Config().args self.state = None # State object created at each parse self.oracle = None # Oracle object created at each parse self.action_count = self.correct_action_count = self.total_actions = self.total_correct_actions = 0 self.label_count = self.correct_label_count = self.total_labels = self.total_correct_labels = 0 self.model = Model(model_type, model_file) self.update_only_on_error = \ ClassifierProperty.update_only_on_error in self.model.model.get_classifier_properties() self.beam = beam # Currently unused self.state_hash_history = None # For loop checking # Used in verify_passage to optionally ignore a mismatch in linkage nodes: self.ignore_node = None if self.args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage self.best_score = self.dev = self.iteration = self.eval_index = None self.trained = False
def run(self, out_file): assert Config().args.train and (Config().args.passages or Config().args.dev) or \ Config().args.passages and Config().args.folds, "insufficient parameters given to parser" print("Running with %s" % self) Config().update(self.params) Config().update_hyperparams(**self.hyperparams) for i, self.scores in enumerate(parse.main_generator(), start=1): print_title = not os.path.exists(out_file) with open(out_file, "a") as f: if print_title: csv.writer(f).writerow([k for k in self.all_params.keys()] + ["average_labeled_f1"] + self.scores.titles()) csv.writer(f).writerow([str(i if n == "iterations" else p) for n, p in self.all_params.items()] + [str(self.scores.average_f1())] + self.scores.fields())
def update(self, features, axis, pred, true, importance=1): """ Update classifier weights according to predicted and true labels :param features: extracted feature values, in the form of a dict (name: value) :param axis: axis of the label we are predicting :param pred: label predicted by the classifier (non-negative integer bounded by num_labels[axis]) :param true: true label (non-negative integer bounded by num_labels[axis]) :param importance: add this many samples with the same features """ super(NeuralNetwork, self).update(features, axis, pred, true, importance) for _ in range(int(importance)): self.losses.append( dy.pick(self.evaluate(features, axis, train=True), true)) if Config().args.dynet_viz: dy.print_graphviz() sys.exit(0)
def __init__(self, model_file=None, model_type=None, beam=1): self.state = None # State object created at each parse self.oracle = None # Oracle object created at each parse self.scores = None # NumPy array of action scores at each action self.action_count = 0 self.correct_count = 0 self.total_actions = 0 self.total_correct = 0 self.model = Model(model_type, model_file, Actions().all) self.beam = beam # Currently unused self.state_hash_history = None # For loop checking # Used in verify_passage to optionally ignore a mismatch in linkage nodes: self.ignore_node = None if Config( ).args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage self.best_score = self.dev = self.iteration = self.eval_index = None self.dev_scores = [] self.trained = False
def __init__(self, *args, model=None, epoch=0): """ Create a new untrained Perceptron or copy the weights from an existing one :param labels: a list of labels that can be updated later to add a new label :param min_update: minimum number of updates to a feature required for consideration :param model: if given, copy the weights (from a trained model) """ super(SparsePerceptron, self).__init__(SPARSE_PERCEPTRON, *args, model=model, epoch=epoch) model = defaultdict(lambda: FeatureWeights(self.num_labels)) if self.is_frozen: model.update(self.model) self.model = model self._min_update = Config( ).args.min_update # Minimum number of updates for a feature to be used in scoring
def create_config(): c = Config("", "-m", "test") c.update({"verbose": 2, "timeout": 1, "embedding_layer_dim": 1, "ner_dim": 1, "action_dim": 1, "lemma_dim": 1, "max_words_external": 3, "word_dim_external": 1, "word_dim": 1, "max_words": 3, "max_lemmas": 3, "max_tags": 3, "max_pos": 3, "max_deps": 3, "max_edge_labels": 3, "max_puncts": 3, "max_action_types": 3, "max_ner_types": 3, "edge_label_dim": 1, "tag_dim": 1, "pos_dim": 1, "dep_dim": 1, "optimizer": "sgd", "output_dim": 1, "layer_dim": 2, "layers": 3, "lstm_layer_dim": 2, "lstm_layers": 3, "max_action_ratio": 10, "update_word_vectors": False, "copy_shared": None}) c.update_hyperparams(shared={"lstm_layer_dim": 2, "lstm_layers": 1}, ucca={"word_dim": 2}, amr={"max_node_labels": 3, "max_node_categories": 3, "node_label_dim": 1, "node_category_dim": 1}) return c
def train_test(train_passages, dev_passages, test_passages, args, model_suffix=""): """ Train and test parser on given passage :param train_passages: passage to train on :param dev_passages: passages to evaluate on every iteration :param test_passages: passages to test on after training :param args: extra argument :param model_suffix: string to append to model filename before file extension :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores """ model_files = [ base + model_suffix + ext for base, ext in map(os.path.splitext, args.models or ( args.classifier, )) ] p = Parser(model_files=model_files, config=Config(), beam=args.beam) yield from filter( None, p.train(train_passages, dev=dev_passages, test=test_passages, iterations=args.iterations)) if test_passages: if args.train or args.folds: print("Evaluating on test passages") passage_scores = [] evaluate = args.evaluate or train_passages for result in p.parse(test_passages, evaluate=evaluate, write=args.write): _, *score = result passage_scores += score if passage_scores: scores = Scores(passage_scores) if args.verbose <= 1 or len(passage_scores) > 1: print("\nAverage %s F1 score on test: %.3f" % (get_eval_type(scores), average_f1(scores))) print("Aggregated scores:") scores.print() print_scores(scores, args.testscores) yield scores
def parse_passage(self, train): """ Internal method to parse a single passage :param train: use oracle to train on given passages, or just parse with classifier? """ if self.args.verbose > 1: print(" initial state: %s" % self.state) while True: if self.args.check_loops: self.check_loop() features = self.model.feature_extractor.extract_features( self.state) true_actions = self.get_true_actions(train) action, predicted_action = self.choose_action( features, train, true_actions) try: self.state.transition(action) except AssertionError as e: raise ParserException("Invalid transition: %s %s" % (action, self.state)) from e if self.args.verbose > 1: if self.oracle: print(" predicted: %-15s true: %-15s taken: %-15s %s" % (predicted_action, "|".join( map(str, true_actions.values())), action, self.state)) else: print(" action: %-15s %s" % (action, self.state)) if self.state.need_label: # Label action that requires a choice of label true_label = self.get_true_label(action.orig_node) label, predicted_label = self.choose_label( features, train, true_label) self.state.label_node(label) if self.args.verbose > 1: if self.oracle and not Config().args.use_gold_node_labels: print(" predicted label: %-15s true label: %-15s" % (predicted_label, true_label)) else: print(" label: %-15s" % label) self.model.model.finished_step(train) if self.args.verbose > 1: for line in self.state.log: print(" " + line) if self.state.finished: return # action is Finish (or early update is triggered)
def assert_possible_child(node): assert node is not self.root, "The root may not have parents" assert (node.text is not None) == (action.tag == EdgeTags.Terminal), \ "Edge tag must be %s iff child is terminal, but node is %s and edge tag is %s" % ( EdgeTags.Terminal, node, action.tag) if Config().args.constraints: assert action.tag not in Constraints.UniqueIncoming or \ action.tag not in node.incoming_tags, \ "Incoming edge tag %s must be unique, but %s already has one" % ( action.tag, node) assert action.tag not in Constraints.ChildlessIncoming or \ node.outgoing_tags <= Constraints.ChildlessOutgoing, \ "Units with incoming %s edges may not have children, but %s has %d" % ( Constraints.ChildlessIncoming, node, len(node.children)) assert action.remote or action.tag in Constraints.possible_multiple_incoming() or \ all(e.remote or e.tag in Constraints.possible_multiple_incoming() for e in node.incoming), \ "Multiple parents only allowed if they are remote or linkage edges: %s, %s" % ( action, node)
def __init__(self, *args, model=None, epoch=0): """ Create a new untrained Perceptron or copy the weights from an existing one :param labels: tuple of lists of labels that can be updated later to add new labels :param min_update: minimum number of updates to a feature required for consideration :param model: if given, copy the weights (from a trained model) """ super(SparsePerceptron, self).__init__(SPARSE, *args, model=model, epoch=epoch) model = defaultdict(self.create_weights) if self.is_frozen: model.update(self.model) self.model = model self.input_dim = len(self.model) self.min_update = Config( ).args.min_update # Minimum number of updates for a feature to be used in scoring self.dropped = set( ) # Features that did not get min_updates after a full epoch
def transition(self, action): """ Main part of the parser: apply action given by oracle or classifier :param action: Action object to apply """ action.apply() self.log = [] if action.is_type(Actions.Shift): # Push buffer head to stack; shift buffer self.stack.append(self.buffer.popleft()) elif action.is_type(Actions.Node): # Create new parent node and add to the buffer parent = self.add_node(action.orig_node) self.update_swap_index(parent) self.add_edge(Edge(parent, self.stack[-1], action.tag)) self.buffer.appendleft(parent) elif action.is_type(Actions.Implicit): # Create new child node and add to the buffer child = self.add_node(action.orig_node, implicit=True) self.update_swap_index(child) self.add_edge(Edge(self.stack[-1], child, action.tag)) self.buffer.appendleft(child) elif action.is_type(Actions.Reduce): # Pop stack (no more edges to create with this node) self.stack.pop() elif action.is_type(Actions.LeftEdge, Actions.LeftRemote, Actions.RightEdge, Actions.RightRemote): parent, child = self.get_parent_child(action) self.add_edge(Edge(parent, child, action.tag, remote=action.remote)) elif action.is_type(Actions.Swap): # Place second (or more) stack item back on the buffer distance = action.tag or 1 s = slice(-distance - 1, -1) self.log.append("%s <--> %s" % (", ".join(map(str, self.stack[s])), self.stack[-1])) self.buffer.extendleft(reversed(self.stack[s])) # extendleft reverses the order del self.stack[s] elif action.is_type(Actions.Finish): # Nothing left to do self.finished = True else: raise Exception("Invalid action: " + action) if Config().args.verify: intersection = set(self.stack).intersection(self.buffer) assert not intersection, "Stack and buffer overlap: %s" % intersection self.assert_node_ratio() action.index = len(self.actions) self.actions.append(action)
def config(): c = Config("", "-m", "test") c.update({"no_node_labels": True, "evaluate": True, "minibatch_size": 50}) c.update_hyperparams(shared={"layer_dim": 50}) return c