Beispiel #1
0
    def from_lists(cls, all_list):
        head_tags = all_list['head_tags']
        head_indices = all_list['head_indices']
        tgt_tokens = all_list['tokens']

        tgt_copy_indices = all_list['coref']
        variables = []
        variables_count = defaultdict(int)
        for i, token in enumerate(tgt_tokens):
            if tgt_copy_indices[i] != i:
                variables.append(variables[tgt_copy_indices[i]])
            else:
                if token[0] in variables_count:
                    variables.append(token[0] + str(variables_count[token[0]]))
                else:
                    variables.append(token[0])

                variables_count[token[0]] += 1

        Triples = []
        for variable, token in zip(variables, tgt_tokens):
            Triples.append(Triple(variable, "instance", token))
            Triples.append(
                Triple(
                    head_indices[variable],
                    head_tags[variable],
                    variable
                )
            )
Beispiel #2
0
 def update_edge_label(self, x, y, old, new):
     self._G[x][y]['label'] = new
     triples = []
     for t in self._triples:
         if t.source == x.identifier and t.target == y.identifier and t.relation == old:
             t = Triple(x.identifier, new, y.identifier)
         triples.append(t)
     self._update_penman_graph(triples)
Beispiel #3
0
def _remove_wiki(graph):
    metadata = graph.metadata
    triples = []
    for t in graph.triples:
        v1, rel, v2 = t
        if rel == ':wiki':
            t = Triple(v1, rel, '+')
        triples.append(t)
    graph = Graph(triples)
    graph.metadata = metadata
    return graph
Beispiel #4
0
    def add_node(self, instance):
        identifier = instance[0]
        assert identifier.isalpha()
        if identifier in self.variables():
            i = 2
            while identifier + str(i) in self.variables():
                i += 1
            identifier += str(i)
        triples = self._triples + [Triple(identifier, 'instance', instance)]
        self._triples = penman.alphanum_order(triples)

        node = AMRNode(identifier, [('instance', instance)])
        self._G.add_node(node)
        return node
def anonymize_graph(g):
    """Anonymize graph by replacing nodes of certain named types with tokens like "named0".

    Modifies original graph. (Gotcha: accesses private member var)

    Returns dict that can be used to recover the original values.

    """
    replacements = []
    id_counters = {}
    carg_triples = g.attributes(relation='carg')
    # anonymize each instance that has a cargs value, storing the mapping from value to token
    for carg_triple in carg_triples:
        named_triple = g.triples(
            relation='instance', source=carg_triple.source)[0]  # assumes exactly 1
        named_type = named_triple.target.replace("_", "")  # _ causes tokenization issues
        value = carg_triple.target.strip('"')
        # extract char location of the word in original (untokenized) sentence
        span_triple = g.triples(relation="lnk", source=carg_triple.source)[0]
        span = [int(pos) for pos in span_triple.target[2:-2].split(':')]  # '"<5:10>"'
        # create data struct to store mapping of this type and create an id counter
        if named_type not in id_counters:
            id_counters[named_type] = 0
        # generate annonymized token and store it with span it should replace
        placeholder = '{}{}'.format(named_type, id_counters[named_type])
        replacements.append({'ph': placeholder, 'span': span, 'value': value})
        id_counters[named_type] += 1
        new_triple = Triple(
            named_triple.source,
            named_triple.relation,
            placeholder,
            inverted=named_triple.inverted
        )
        # gotcha: accessing private member var
        g._triples.insert(g._triples.index(named_triple), new_triple)
        g._triples.remove(named_triple)
        g._triples.remove(carg_triple)
    return replacements
def combine_attributes(g):
    """Group all attribute nodes into one.

    Attribute list is normalized by uppercasing the value and sorting
    the list by attribute name. Concatenated attributes are appended
    to the instance (predicate) target value so OpenNMT will interpret
    them as word features.

    Note that OpenNMT expects all tokens to have the same number of word
    features, but only predicate tokens have attributes, so an extra step
    will be required to make sure all tokens have a feature. (See _layout
    in PenmanToLinearCodec)

    """
    for variable in g.variables():
        old_attributes = [
            attr for attr in g.attributes(source=variable) if attr.relation != 'instance'
        ]
        new_targets = []
        for old_attr in old_attributes:
            old_relation = old_attr.relation
            old_target = old_attr.target.upper() if isinstance(old_attr.target, str) else old_attr.target
            # don't store span info (only needed for anonymization) or untensed (doesn't provide much info)
            if old_relation != 'lnk' and (old_relation, old_target) != ('tense', 'UNTENSED'):
                new_targets.append('{}={}'.format(old_relation, old_target))
            g._triples.remove(old_attr)
        if new_targets:
            attr_features = '|'.join(sorted(new_targets))  # sort by attribute name
            instance = g.attributes(source=variable, relation='instance')[0]
            new_instance = Triple(
                source=instance.source,
                relation=instance.relation,
                target=instance.target + '│' + attr_features  # N.B. '│' not '|'
            )
            g._triples.insert(g._triples.index(instance), new_instance)
            g._triples.remove(instance)
Beispiel #7
0
        "top_k": 10,
        "show_url": False,
        "fast": args.fast,  # set this to be true if speed is a concern
        "output_path": models_path + "logs/",  # logging directory
        "faiss_index": None,  #"flat",
        "index_path": models_path + "faiss_flat_index.pkl",
    }

    args_blink = argparse.Namespace(**config)
    models = main_dense.load_models(args_blink, logger=logger)
    _, _, _, _, _, predictions, scores, = main_dense.run(args_blink,
                                                         logger,
                                                         *models,
                                                         test_data=for_blink,
                                                         device=args.device)

    for s, pp in zip(for_blink, predictions):
        pp = [p for p in pp if not p.startswith('List of')]
        p = f'"{pp[0]}"' if pp else '-'
        p = p.replace(' ', '_')
        graph_n = s['graph_n']
        triple_n = s['triple_n']
        triples = [g for g in graphs[graph_n].triples]
        n, rel, w = triples[triple_n]
        triples[triple_n] = Triple(n, rel, p)
        g = Graph(triples)
        g.metadata = graphs[graph_n].metadata
        graphs[graph_n] = g

    write_predictions(args.out, AMRBartTokenizer, graphs)