Exemple #1
0
 def __init__(self, model_files=(), config=None, beam=1):
     super().__init__(config=config or Config(),
                      models=list(map(Model, (model_files,) if isinstance(model_files, str) else
                                      model_files or (config.args.classifier,))))
     self.beam = beam  # Currently unused
     self.best_score = self.dev = self.test = self.iteration = self.epoch = self.batch = None
     self.trained = self.save_init = False
     self.accuracies = {}
Exemple #2
0
def average_f1(scores, eval_type=None):
    for e in (eval_type or get_eval_type(scores), ) + EVAL_TYPES:
        try:
            return scores.average_f1(e)
        except ValueError as ex:
            Config().print([e, str(ex)], level=0)
            pass
    return 0
Exemple #3
0
 def load_extra(self, d):
     Config().args.lstm_layers = self.lstm_layers = d["lstm_layers"]
     Config().args.lstm_layer_dim = self.lstm_layer_dim = d.get(
         "lstm_layer_dim",
         Config().args.lstm_layer_dim)
     Config().args.embedding_layers = self.embedding_layers = d.get(
         "embedding_layers",
         Config().args.embedding_layers)
     Config().args.embedding_layer_dim = self.embedding_layer_dim = d.get(
         "embedding_layer_dim",
         Config().args.embedding_layer_dim)
     Config().args.max_length = self.max_length = d.get(
         "max_length",
         Config().args.max_length)
Exemple #4
0
 def add(self):
     assert self.tag is not None, "No tag given for new edge %s -> %s" % (self.parent, self.child)
     assert self.parent is not self.child, "Trying to create self-loop edge on %s" % self.parent
     if Config().args.verify:
         assert self not in self.parent.outgoing, "Trying to create outgoing edge twice: %s" % self
         assert self not in self.child.incoming, "Trying to create incoming edge twice: %s" % self
         assert self.parent not in self.child.descendants, "Detected cycle created by edge: %s" % self
     self.parent.add_outgoing(self)
     self.child.add_incoming(self)
Exemple #5
0
def read_passages(args, files):
    expanded = [
        f for pattern in files for f in sorted(glob(pattern)) or (pattern, )
    ]
    return ioutil.read_files_and_dirs(expanded,
                                      sentences=args.sentences,
                                      paragraphs=args.paragraphs,
                                      converters=CONVERTERS,
                                      lang=Config().args.lang)
def get_ucca_passage(sentence):
    source_path = Path(tempfile.mkdtemp()) / 'source.txt'
    with source_path.open('w') as f:
        f.write(sentence + '\n')
    argv = ['script_name', str(source_path)]
    with unittest.mock.patch('sys.argv', argv):
        Config.reload()
        args = Config().args
    train_passages, dev_passages, test_passages = [
        read_passages(args, arg)
        for arg in (args.train, args.dev, args.passages)
    ]
    ucca_passages = [
        ucca_passage for (ucca_passage, ) in PARSER.parse(
            test_passages, evaluate=[], display=False, write=False)
    ]
    assert len(ucca_passages) == 1
    return ucca_passages[0]
Exemple #7
0
def main_generator():
    args = Config().args
    assert args.passages or args.train, "Either passages or --train is required (use -h for help)"
    assert args.models or args.train or args.folds, "Either --model or --train or --folds is required"
    assert not (
        args.train or args.dev
    ) or not args.folds, "--train and --dev are incompatible with --folds"
    assert args.train or not args.dev, "--dev is only possible together with --train"
    if args.folds:
        fold_scores = []
        all_passages = list(read_passages(args, args.passages))
        assert len(all_passages) >= args.folds, \
            "%d folds are not possible with only %d passages" % (args.folds, len(all_passages))
        Config().random.shuffle(all_passages)
        folds = [all_passages[i::args.folds] for i in range(args.folds)]
        for i in range(args.folds):
            print("Fold %d of %d:" % (i + 1, args.folds))
            dev_passages = folds[i]
            test_passages = folds[(i + 1) % args.folds]
            train_passages = [
                passage for fold in folds
                if fold is not dev_passages and fold is not test_passages
                for passage in fold
            ]
            s = list(
                train_test(train_passages, dev_passages, test_passages, args,
                           "_%d" % i))
            if s and s[-1] is not None:
                fold_scores.append(s[-1])
        if fold_scores:
            scores = Scores(fold_scores)
            print("Average test F1 score for each fold: " +
                  ", ".join("%.3f" % average_f1(s) for s in fold_scores))
            print("Aggregated scores across folds:\n")
            scores.print()
            yield scores
    else:  # Simple train/dev/test by given arguments
        train_passages, dev_passages, test_passages = [
            read_passages(args, arg)
            for arg in (args.train, args.dev, args.passages)
        ]
        yield from train_test(train_passages, dev_passages, test_passages,
                              args)
Exemple #8
0
 def __init__(self, passage):
     l1 = passage.layer(layer1.LAYER_ID)
     self.nodes_remaining = {
         node.ID
         for node in l1.all if node is not l1.heads[0] and (
             Config().args.linkage or node.tag != layer1.NodeTags.Linkage)
         and (Config().args.implicit or not node.attrib.get("implicit"))
     }
     self.edges_remaining = {
         edge
         for node in passage.nodes.values() for edge in node
         if (Config().args.linkage or edge.tag not in
             (layer1.EdgeTags.LinkRelation,
              layer1.EdgeTags.LinkArgument)) and
         (Config().args.implicit or not edge.child.attrib.get("implicit"))
         and (Config().args.remote or not edge.attrib.get("remote"))
     }
     self.passage = passage
     self.edge_found = False
     self.log = None
Exemple #9
0
def main():
    if not os.path.exists(MODELS_DIR):
        os.makedirs(MODELS_DIR)
    Config().args.no_write = True
    out_file = os.environ.get("PARAMS_FILE", "params.csv")
    word_vectors_files = [os.environ[f] for f in os.environ if f.startswith("WORD_VECTORS")]
    num = int(os.environ.get("PARAMS_NUM", 30))
    np.random.seed()
    domains = (
        ("seed",                    2147483647),  # max value for int
        ("classifier",              (config.MLP_NN, config.BILSTM_NN)),
        ("update_word_vectors",     [True, False]),
        ("word_vectors",            [None] + word_vectors_files),
        ("word_dim_external",       (0, 300)),
        ("word_dim",                (0, 50, 100, 200, 300)),
        ("tag_dim",                 (5, 10, 20)),
        ("dep_dim",                 (5, 10, 20)),
        ("label_dim",               (5, 10, 20)),
        ("punct_dim",               (1, 2, 3)),
        ("gap_dim",                 (1, 2, 3)),
        ("action_dim",              (3, 5, 10)),
        ("layer_dim",               (50, 100, 200, 300, 500, 1000)),
        ("layers",                  [1] + 5 * [2]),
        ("lstm_layer_dim",          (50, 100, 200, 300, 500, 1000)),
        ("lstm_layers",             [1] + 5 * [2]),
        ("embedding_layer_dim",     (50, 100, 200, 300, 500, 1000)),
        ("embedding_layers",        5 * [1] + [2]),
        ("activation",              config.ACTIVATIONS),
        ("init",                    5 * [config.INITIALIZATIONS[0]] + list(config.INITIALIZATIONS)),
        ("batch_size",              (10, 30, 50, 100, 200, 500)),
        ("minibatch_size",          (50, 100, 200, 300, 500, 1000)),
        ("optimizer",               5 * [config.OPTIMIZERS[0]] + list(config.OPTIMIZERS)),
        ("swap_importance",         (1, 2)),
        ("iterations",              range(1, 51)),
        ("word_dropout",            (0, .1, .2, .25, .3)),
        ("word_dropout_external",   (0, .1, .2, .25, .3)),
        ("dynet_weight_decay",      (1e-7, 1e-6, 1e-5, 1e-4)),
        ("dropout",                 (0, .1, .2, .3, .4, .5)),
    )
    params = [Params(OrderedDict(p))
              for p in zip(*[[(n, v.item() if hasattr(v, "item") else v)
                              for v in np.random.choice(vs, num)]
                             for n, vs in domains])]
    print("All parameter combinations to try:")
    print("\n".join(map(str, params)))
    print("Saving results to '%s'" % out_file)
    with open(out_file, "w") as f:
        csv.writer(f).writerow(params[0].get_field_titles())
    for param in params:
        param.run()
        with open(out_file, "a") as f:
            param.write_scores(csv.writer(f))
        best = max(params, key=Params.score)
        print("Best parameters: %s" % best)
Exemple #10
0
 def add_node(self, *args, **kwargs):
     """
     Called during parsing to add a new Node (not core.Node) to the temporary representation
     :param args: ordinal arguments for Node()
     :param kwargs: keyword arguments for Node()
     """
     node = Node(len(self.nodes), *args, **kwargs)
     if Config().args.verify:
         assert node not in self.nodes, "Node already exists"
     self.nodes.append(node)
     self.log.append("node: %s" % node)
     return node
 def set_node(self, state, node_dropout=0):
     self.node = None
     if state is None or node_dropout and node_dropout > Config().random.random_sample():
         return
     try:
         if self.source == "s":
             self.node = state.stack[-1 - self.index]
         elif self.source == "b":
             self.node = state.buffer[self.index]
         else:  # source == "a"
             self.node = state.actions[-1 - self.index]
         for relative in self.relatives:
             nodes = self.node.parents if relative.isupper() else self.node.children
             if relative.lower() == "r":
                 if len(nodes) == 1:
                     raise ValueError("Avoiding identical right and left relatives")
                 self.node = nodes[-1]
             else:  # relative.lower() == "l"
                 self.node = nodes[0]
     except (IndexError, TypeError, AttributeError, IndexError, ValueError):
         if Config().args.missing_node_features or node_dropout and node_dropout > Config().random.random_sample():
             self.node = None
Exemple #12
0
 def assert_possible_edge():
     parent, child = self.get_parent_child(action)
     assert_possible_parent(parent)
     assert_possible_child(child)
     if parent is self.root and Config().args.constraints:
         assert child.text is None, "Root may not have terminal children, but is being added '%s'" % child
         assert action.tag in Constraints.TopLevel, "The root may not have %s edges" % action.tag
     # if Config().args.multiple_edges:  # Removed this option because it is not useful right now
     #     edge = Edge(parent, child, action.tag, remote=action.remote)
     #     assert edge not in parent.outgoing, "Edge must not already exist: %s" % edge
     # else:
     assert child not in parent.children, "Edge must not already exist: %s->%s" % (parent, child)
     assert parent not in child.descendants, "Detected cycle created by edge: %s->%s" % (parent, child)
Exemple #13
0
 def score(self, features):
     """
     Calculate score for each label
     :param features: extracted feature values, of size input_size
     :return: array with score for each label
     """
     super(NeuralNetwork, self).score(features)
     if self._iteration > 0:
         return self.evaluate(features).npvalue()[:self.num_labels]
     else:
         if Config().args.verbose >= 2:
             print("  no updates done yet, returning zero vector.")
         return np.zeros(self.num_labels)
Exemple #14
0
 def __init__(self,
              model_type,
              filename,
              labels,
              input_params=None,
              model=None):
     """
     :param labels: tuple of lists of labels that can be updated later to add new labels
     :param input_params: dict of feature type name -> FeatureInformation
     :param model: if given, copy the weights (from a trained model)
     """
     self.model = None
     self.model_type = model_type
     self.filename = filename
     self.labels = tuple(labels)
     self.input_params = input_params
     self._num_labels = self.num_labels
     self.input_dim = None
     self.is_frozen = model is not None
     self.updates = 0
     self.epoch = 0
     self.learning_rate = Config().args.learning_rate
     self.learning_rate_decay = Config().args.learning_rate_decay
Exemple #15
0
 def update(self, features, pred, true, importance=1):
     """
     Update classifier weights according to predicted and true labels
     :param features: extracted feature values, of size input_size
     :param pred: label predicted by the classifier (non-negative integer less than num_labels)
     :param true: true label (non-negative integer less than num_labels)
     :param importance: add this many samples with the same features
     """
     super(NeuralNetwork, self).update(features, pred, true, importance)
     for _ in range(int(importance)):
         self._losses.append(
             dy.pick(self.evaluate(features, train=True), true))
         if Config().args.dynet_viz:
             dy.print_graphviz()
             sys.exit(0)
Exemple #16
0
 def init_data(self, param):
     if param.data is not None or isinstance(param,
                                             NumericFeatureParameters):
         return
     param.num = self.feature_extractor.num_features_non_numeric(
         param.effective_suffix)
     if param.dim:
         if param.external:
             vectors = self.get_word_vectors(param)
             param.data = UnknownDict(vectors, np.zeros(param.dim))
         else:
             param.data = defaultdict(
                 lambda d=param.dim: Config().random.normal(size=d))
             _ = param.data[UnknownDict.UNKNOWN]  # Initialize unknown value
     param.empty = np.zeros(param.dim, dtype=float)
Exemple #17
0
 def assert_possible_parent(node):
     assert node.text is None, "Terminals may not have children: %s" % node.text
     assert not node.implicit, "Implicit nodes may not have children: %s" % s0
     if Config().args.constraints:
         assert action.tag not in Constraints.UniqueOutgoing or action.tag not in node.outgoing_tags, \
             "Outgoing edge tag %s must be unique, but %s already has one" % (
                 action.tag, node)
         assert action.tag not in Constraints.MutuallyExclusiveOutgoing or not \
             node.outgoing_tags & Constraints.MutuallyExclusiveOutgoing, \
             "Outgoing edge tags %s are mutually exclusive, but %s already has %s and is being added %s" % (
                 Constraints.MutuallyExclusiveOutgoing, node, node.outgoing_tags, action.tag)
         assert action.tag in Constraints.ChildlessOutgoing or not \
             node.incoming_tags & Constraints.ChildlessIncoming, \
             "Units with incoming %s edges may not have children, and %s has incoming %s" % (
                 Constraints.ChildlessIncoming, node, node.incoming_tags)
Exemple #18
0
 def score(self, features, axis):
     """
     Calculate score for each label
     :param features: extracted feature values, of size input_size
     :param axis: axis of the label we are predicting
     :return: array with score for each label
     """
     super(NeuralNetwork, self).score(features, axis)
     if self.updates > 0:
         return self.evaluate(features,
                              axis).npvalue()[:self.num_labels[axis]]
     else:
         if Config().args.verbose > 2:
             print("  no updates done yet, returning zero vector.")
         return np.zeros(self.num_labels[axis])
Exemple #19
0
 def finalize(self, finished_epoch=False):
     """
     Fit this model on collected samples
     :return self
     """
     super(NeuralNetwork, self).finalize()
     if self.model is None:
         self.init_model()
     if self._losses:
         loss = -dy.esum(self._losses)
         loss.forward()
         if Config().args.verbose >= 2:
             print("Total loss from %d time steps: %g" %
                   (len(self._losses), loss.value()))
         loss.backward()
         self._trainer.update()
         self.init_cg()
         self._losses = []
         self._iteration += 1
     if finished_epoch:
         self._trainer.update_epoch()
     if Config().args.verbose:
         self._trainer.status()
     return self
Exemple #20
0
 def __init__(self, model_file=None, model_type=None, beam=1):
     self.args = Config().args
     self.state = None  # State object created at each parse
     self.oracle = None  # Oracle object created at each parse
     self.action_count = self.correct_action_count = self.total_actions = self.total_correct_actions = 0
     self.label_count = self.correct_label_count = self.total_labels = self.total_correct_labels = 0
     self.model = Model(model_type, model_file)
     self.update_only_on_error = \
         ClassifierProperty.update_only_on_error in self.model.model.get_classifier_properties()
     self.beam = beam  # Currently unused
     self.state_hash_history = None  # For loop checking
     # Used in verify_passage to optionally ignore a mismatch in linkage nodes:
     self.ignore_node = None if self.args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage
     self.best_score = self.dev = self.iteration = self.eval_index = None
     self.trained = False
Exemple #21
0
 def run(self, out_file):
     assert Config().args.train and (Config().args.passages or Config().args.dev) or \
            Config().args.passages and Config().args.folds, "insufficient parameters given to parser"
     print("Running with %s" % self)
     Config().update(self.params)
     Config().update_hyperparams(**self.hyperparams)
     for i, self.scores in enumerate(parse.main_generator(), start=1):
         print_title = not os.path.exists(out_file)
         with open(out_file, "a") as f:
             if print_title:
                 csv.writer(f).writerow([k for k in self.all_params.keys()] +
                                        ["average_labeled_f1"] + self.scores.titles())
             csv.writer(f).writerow([str(i if n == "iterations" else p) for n, p in self.all_params.items()] +
                                    [str(self.scores.average_f1())] + self.scores.fields())
Exemple #22
0
 def update(self, features, axis, pred, true, importance=1):
     """
     Update classifier weights according to predicted and true labels
     :param features: extracted feature values, in the form of a dict (name: value)
     :param axis: axis of the label we are predicting
     :param pred: label predicted by the classifier (non-negative integer bounded by num_labels[axis])
     :param true: true label (non-negative integer bounded by num_labels[axis])
     :param importance: add this many samples with the same features
     """
     super(NeuralNetwork, self).update(features, axis, pred, true,
                                       importance)
     for _ in range(int(importance)):
         self.losses.append(
             dy.pick(self.evaluate(features, axis, train=True), true))
         if Config().args.dynet_viz:
             dy.print_graphviz()
             sys.exit(0)
Exemple #23
0
 def __init__(self, model_file=None, model_type=None, beam=1):
     self.state = None  # State object created at each parse
     self.oracle = None  # Oracle object created at each parse
     self.scores = None  # NumPy array of action scores at each action
     self.action_count = 0
     self.correct_count = 0
     self.total_actions = 0
     self.total_correct = 0
     self.model = Model(model_type, model_file, Actions().all)
     self.beam = beam  # Currently unused
     self.state_hash_history = None  # For loop checking
     # Used in verify_passage to optionally ignore a mismatch in linkage nodes:
     self.ignore_node = None if Config(
     ).args.linkage else lambda n: n.tag == layer1.NodeTags.Linkage
     self.best_score = self.dev = self.iteration = self.eval_index = None
     self.dev_scores = []
     self.trained = False
Exemple #24
0
 def __init__(self, *args, model=None, epoch=0):
     """
     Create a new untrained Perceptron or copy the weights from an existing one
     :param labels: a list of labels that can be updated later to add a new label
     :param min_update: minimum number of updates to a feature required for consideration
     :param model: if given, copy the weights (from a trained model)
     """
     super(SparsePerceptron, self).__init__(SPARSE_PERCEPTRON,
                                            *args,
                                            model=model,
                                            epoch=epoch)
     model = defaultdict(lambda: FeatureWeights(self.num_labels))
     if self.is_frozen:
         model.update(self.model)
     self.model = model
     self._min_update = Config(
     ).args.min_update  # Minimum number of updates for a feature to be used in scoring
Exemple #25
0
def create_config():
    c = Config("", "-m", "test")
    c.update({"verbose": 2, "timeout": 1, "embedding_layer_dim": 1, "ner_dim": 1, "action_dim": 1, "lemma_dim": 1,
              "max_words_external": 3, "word_dim_external": 1, "word_dim": 1, "max_words": 3, "max_lemmas": 3,
              "max_tags": 3, "max_pos": 3, "max_deps": 3, "max_edge_labels": 3, "max_puncts": 3, "max_action_types": 3,
              "max_ner_types": 3, "edge_label_dim": 1, "tag_dim": 1, "pos_dim": 1, "dep_dim": 1, "optimizer": "sgd",
              "output_dim": 1, "layer_dim": 2, "layers": 3, "lstm_layer_dim": 2, "lstm_layers": 3,
              "max_action_ratio": 10, "update_word_vectors": False, "copy_shared": None})
    c.update_hyperparams(shared={"lstm_layer_dim": 2, "lstm_layers": 1}, ucca={"word_dim": 2},
                         amr={"max_node_labels": 3, "max_node_categories": 3,
                              "node_label_dim": 1, "node_category_dim": 1})
    return c
Exemple #26
0
def train_test(train_passages,
               dev_passages,
               test_passages,
               args,
               model_suffix=""):
    """
    Train and test parser on given passage
    :param train_passages: passage to train on
    :param dev_passages: passages to evaluate on every iteration
    :param test_passages: passages to test on after training
    :param args: extra argument
    :param model_suffix: string to append to model filename before file extension
    :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores
    """
    model_files = [
        base + model_suffix + ext
        for base, ext in map(os.path.splitext, args.models or (
            args.classifier, ))
    ]
    p = Parser(model_files=model_files, config=Config(), beam=args.beam)
    yield from filter(
        None,
        p.train(train_passages,
                dev=dev_passages,
                test=test_passages,
                iterations=args.iterations))
    if test_passages:
        if args.train or args.folds:
            print("Evaluating on test passages")
        passage_scores = []
        evaluate = args.evaluate or train_passages
        for result in p.parse(test_passages,
                              evaluate=evaluate,
                              write=args.write):
            _, *score = result
            passage_scores += score
        if passage_scores:
            scores = Scores(passage_scores)
            if args.verbose <= 1 or len(passage_scores) > 1:
                print("\nAverage %s F1 score on test: %.3f" %
                      (get_eval_type(scores), average_f1(scores)))
                print("Aggregated scores:")
                scores.print()
            print_scores(scores, args.testscores)
            yield scores
Exemple #27
0
 def parse_passage(self, train):
     """
     Internal method to parse a single passage
     :param train: use oracle to train on given passages, or just parse with classifier?
     """
     if self.args.verbose > 1:
         print("  initial state: %s" % self.state)
     while True:
         if self.args.check_loops:
             self.check_loop()
         features = self.model.feature_extractor.extract_features(
             self.state)
         true_actions = self.get_true_actions(train)
         action, predicted_action = self.choose_action(
             features, train, true_actions)
         try:
             self.state.transition(action)
         except AssertionError as e:
             raise ParserException("Invalid transition: %s %s" %
                                   (action, self.state)) from e
         if self.args.verbose > 1:
             if self.oracle:
                 print("  predicted: %-15s true: %-15s taken: %-15s %s" %
                       (predicted_action, "|".join(
                           map(str,
                               true_actions.values())), action, self.state))
             else:
                 print("  action: %-15s %s" % (action, self.state))
         if self.state.need_label:  # Label action that requires a choice of label
             true_label = self.get_true_label(action.orig_node)
             label, predicted_label = self.choose_label(
                 features, train, true_label)
             self.state.label_node(label)
             if self.args.verbose > 1:
                 if self.oracle and not Config().args.use_gold_node_labels:
                     print("  predicted label: %-15s true label: %-15s" %
                           (predicted_label, true_label))
                 else:
                     print("  label: %-15s" % label)
         self.model.model.finished_step(train)
         if self.args.verbose > 1:
             for line in self.state.log:
                 print("    " + line)
         if self.state.finished:
             return  # action is Finish (or early update is triggered)
Exemple #28
0
 def assert_possible_child(node):
     assert node is not self.root, "The root may not have parents"
     assert (node.text is not None) == (action.tag == EdgeTags.Terminal), \
         "Edge tag must be %s iff child is terminal, but node is %s and edge tag is %s" % (
             EdgeTags.Terminal, node, action.tag)
     if Config().args.constraints:
         assert action.tag not in Constraints.UniqueIncoming or \
             action.tag not in node.incoming_tags, \
             "Incoming edge tag %s must be unique, but %s already has one" % (
                 action.tag, node)
         assert action.tag not in Constraints.ChildlessIncoming or \
             node.outgoing_tags <= Constraints.ChildlessOutgoing, \
             "Units with incoming %s edges may not have children, but %s has %d" % (
                 Constraints.ChildlessIncoming, node, len(node.children))
         assert action.remote or action.tag in Constraints.possible_multiple_incoming() or \
             all(e.remote or e.tag in Constraints.possible_multiple_incoming()
                 for e in node.incoming), \
             "Multiple parents only allowed if they are remote or linkage edges: %s, %s" % (
                 action, node)
Exemple #29
0
 def __init__(self, *args, model=None, epoch=0):
     """
     Create a new untrained Perceptron or copy the weights from an existing one
     :param labels: tuple of lists of labels that can be updated later to add new labels
     :param min_update: minimum number of updates to a feature required for consideration
     :param model: if given, copy the weights (from a trained model)
     """
     super(SparsePerceptron, self).__init__(SPARSE,
                                            *args,
                                            model=model,
                                            epoch=epoch)
     model = defaultdict(self.create_weights)
     if self.is_frozen:
         model.update(self.model)
     self.model = model
     self.input_dim = len(self.model)
     self.min_update = Config(
     ).args.min_update  # Minimum number of updates for a feature to be used in scoring
     self.dropped = set(
     )  # Features that did not get min_updates after a full epoch
Exemple #30
0
 def transition(self, action):
     """
     Main part of the parser: apply action given by oracle or classifier
     :param action: Action object to apply
     """
     action.apply()
     self.log = []
     if action.is_type(Actions.Shift):  # Push buffer head to stack; shift buffer
         self.stack.append(self.buffer.popleft())
     elif action.is_type(Actions.Node):  # Create new parent node and add to the buffer
         parent = self.add_node(action.orig_node)
         self.update_swap_index(parent)
         self.add_edge(Edge(parent, self.stack[-1], action.tag))
         self.buffer.appendleft(parent)
     elif action.is_type(Actions.Implicit):  # Create new child node and add to the buffer
         child = self.add_node(action.orig_node, implicit=True)
         self.update_swap_index(child)
         self.add_edge(Edge(self.stack[-1], child, action.tag))
         self.buffer.appendleft(child)
     elif action.is_type(Actions.Reduce):  # Pop stack (no more edges to create with this node)
         self.stack.pop()
     elif action.is_type(Actions.LeftEdge, Actions.LeftRemote, Actions.RightEdge, Actions.RightRemote):
         parent, child = self.get_parent_child(action)
         self.add_edge(Edge(parent, child, action.tag, remote=action.remote))
     elif action.is_type(Actions.Swap):  # Place second (or more) stack item back on the buffer
         distance = action.tag or 1
         s = slice(-distance - 1, -1)
         self.log.append("%s <--> %s" % (", ".join(map(str, self.stack[s])), self.stack[-1]))
         self.buffer.extendleft(reversed(self.stack[s]))  # extendleft reverses the order
         del self.stack[s]
     elif action.is_type(Actions.Finish):  # Nothing left to do
         self.finished = True
     else:
         raise Exception("Invalid action: " + action)
     if Config().args.verify:
         intersection = set(self.stack).intersection(self.buffer)
         assert not intersection, "Stack and buffer overlap: %s" % intersection
     self.assert_node_ratio()
     action.index = len(self.actions)
     self.actions.append(action)
Exemple #31
0
def config():
    c = Config("", "-m", "test")
    c.update({"no_node_labels": True, "evaluate": True, "minibatch_size": 50})
    c.update_hyperparams(shared={"layer_dim": 50})
    return c