Ejemplo n.º 1
0
 def __init__(self):
     self.gilp = GurobiILP()
     self.weights = FeatureVector()
     self.feat_extr = FeatureExtractor()
     return
Ejemplo n.º 2
0
 def __init__(self):
     self.gilp = GurobiILP()
     self.weights = FeatureVector()
     self.feat_extr = FeatureExtractor()
     return
Ejemplo n.º 3
0
class Decoder(object):
    """
    Implement of decoder for structured prediction
    """
    def __init__(self):
        self.gilp = GurobiILP()
        self.weights = FeatureVector()
        self.feat_extr = FeatureExtractor()
        return

    def decode(self,
               instance,
               oracle_len='nolen',
               node_cost=None,
               edge_cost=None):
        """
        an instance includes:
        my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ...
        my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,...
        selected_nodes: (1,), (3,),... nodes contained in summary graph
        selected_edges: (1,2), (3,1),... edges contained in summary graph
        """
        logger.debug('start feature extraction...')

        curr_filename = instance.filename
        my_nodes, oracle_nodes, root_nodes = instance.nodes  # nodes and selected nodes
        my_edges, oracle_edges = instance.edges  # edges and selected edges
        num_gold_nodes, num_gold_edges = instance.gold  # number of gold nodes and edges

        node_weights = {}
        edge_weights = {}

        # get edge weights
        num_nonnegative_edges = 0
        for k_edge, v_edge in my_edges.iteritems():
            for tag in [0, 1]:
                edge_feats = self.feat_extr.getEdgeFeats(
                    k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)
                edge_weights[k_edge + (tag, )] = self.weights.dot(edge_feats)
                # cost-augmented decoding
                if edge_cost is not None:
                    if tag == 0 and k_edge not in oracle_edges:  # true negative
                        curr_edge_cost = 0
                    if tag == 1 and k_edge in oracle_edges:  # true positive
                        curr_edge_cost = 0
                    if tag == 1 and k_edge not in oracle_edges:  # false positive
                        curr_edge_cost = edge_cost
                    if tag == 0 and k_edge in oracle_edges:  # false negative
                        curr_edge_cost = edge_cost
                    edge_weights[k_edge + (tag, )] += curr_edge_cost
                if tag == 1 and edge_weights[k_edge + (tag, )] > 0.0:
                    num_nonnegative_edges += 1

        # count number of non-negative edges
        logger.debug('[num_nonnegative_edges]: %d' % num_nonnegative_edges)

        # get node weights
        num_nonnegative_nodes = 0
        for k_node, v_node in my_nodes.iteritems():
            for tag in [0, 1]:
                node_feats = self.feat_extr.getNodeFeats(
                    k_node, v_node, tag, curr_filename, my_nodes, my_edges)
                node_weights[k_node + (tag, )] = self.weights.dot(node_feats)
                # cost-augmented decoding
                if node_cost is not None:
                    if tag == 0 and k_node not in oracle_nodes:
                        curr_node_cost = 0
                    if tag == 1 and k_node in oracle_nodes:
                        curr_node_cost = 0
                    if tag == 1 and k_node not in oracle_nodes:  # false positive
                        curr_node_cost = node_cost
                    if tag == 0 and k_node in oracle_nodes:  # false negative
                        curr_node_cost = node_cost
                    node_weights[k_node + (tag, )] += curr_node_cost
                if tag == 1 and node_weights[k_node + (tag, )] > 0.0:
                    num_nonnegative_nodes += 1

        # count number of non-negative nodes
        logger.debug('[num_nonnegative_nodes]: %d' % num_nonnegative_nodes)

        # run Gurobi ILP decoder using node and edge weights
        # optionally set the decoded summary length (#nodes and #edges)
        logger.debug('start ILP decoding...')
        num_selected_nodes = num_gold_nodes if oracle_len == 'nodes' else 0
        num_selected_edges = num_gold_edges if oracle_len == 'edges' else 0
        selected_nodes, selected_edges, score_pred = self.gilp.decode(
            node_weights,
            edge_weights,
            root_nodes,
            num_selected_nodes=num_selected_nodes,
            num_selected_edges=num_selected_edges)
        logger.debug('[num_gold_nodes]: %d' % num_gold_nodes)
        logger.debug('[num_selected_nodes]: %d' % len(selected_nodes))
        logger.debug('[num_gold_edges]: %d' % num_gold_edges)
        logger.debug('[num_selected_edges]: %d' % len(selected_edges))

        # features that are associated with the decoded graph
        feat_vec = FeatureVector()

        for k_edge, v_edge in my_edges.iteritems():
            tag = 1 if k_edge in selected_edges else 0  # use decoded tag
            feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag,
                                                    curr_filename, my_nodes,
                                                    my_edges)

        for k_node, v_node in my_nodes.iteritems():
            tag = 1 if k_node in selected_nodes else 0  # use decoded tag
            feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag,
                                                    curr_filename, my_nodes,
                                                    my_edges)

        # return features associated with decoded graph
        return feat_vec, selected_nodes, selected_edges, score_pred

    def oracle(self, instance):
        """
        an instance includes:
        my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ...
        my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,...
        root_nodes: (1,), (3,),... nodes that are root of sentence
        selected_nodes: (1,), (3,),... nodes contained in summary graph
        selected_edges: (1,2), (3,1),... edges contained in summary graph
        """
        logger.debug('start oracle decoding...')

        curr_filename = instance.filename
        my_nodes, oracle_nodes, _ = instance.nodes  # nodes and selected nodes
        my_edges, oracle_edges = instance.edges  # edges and selected edges

        # features that are associated with oracle graph
        feat_vec = FeatureVector()

        for k_edge, v_edge in my_edges.iteritems():
            tag = 1 if k_edge in oracle_edges else 0  # use oracle tag
            feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag,
                                                    curr_filename, my_nodes,
                                                    my_edges)

        for k_node, v_node in my_nodes.iteritems():
            tag = 1 if k_node in oracle_nodes else 0  # use oracle tag
            feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag,
                                                    curr_filename, my_nodes,
                                                    my_edges)

        score_true = self.weights.dot(feat_vec)

        # return features associated with oracle graph
        return feat_vec, oracle_nodes, oracle_edges, score_true
Ejemplo n.º 4
0
class Decoder(object):
    """
    Implement of decoder for structured prediction
    """
    def __init__(self):
        self.gilp = GurobiILP()
        self.weights = FeatureVector()
        self.feat_extr = FeatureExtractor()
        return
    
    def decode(self, instance, oracle_len='nolen', node_cost=None, edge_cost=None):
        """
        an instance includes:
        my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ...
        my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,...
        selected_nodes: (1,), (3,),... nodes contained in summary graph
        selected_edges: (1,2), (3,1),... edges contained in summary graph
        """
        logger.debug('start feature extraction...')
        
        curr_filename = instance.filename
        my_nodes, oracle_nodes, root_nodes = instance.nodes # nodes and selected nodes
        my_edges, oracle_edges = instance.edges # edges and selected edges
        num_gold_nodes, num_gold_edges = instance.gold # number of gold nodes and edges
        
        node_weights = {}
        edge_weights = {}
        
        # get edge weights
        num_nonnegative_edges = 0
        for k_edge, v_edge in my_edges.iteritems():
            for tag in [0,1]:
                edge_feats = self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)
                edge_weights[k_edge + (tag,)] = self.weights.dot(edge_feats)
                # cost-augmented decoding
                if edge_cost is not None:
                    if tag == 0 and k_edge not in oracle_edges:      # true negative
                        curr_edge_cost = 0
                    if tag == 1 and k_edge in oracle_edges:          # true positive
                        curr_edge_cost = 0
                    if tag == 1 and k_edge not in oracle_edges:      # false positive
                        curr_edge_cost = edge_cost 
                    if tag == 0 and k_edge in oracle_edges:          # false negative
                        curr_edge_cost = edge_cost
                    edge_weights[k_edge + (tag,)] += curr_edge_cost
                if tag == 1 and edge_weights[k_edge + (tag,)] > 0.0: num_nonnegative_edges += 1
                
        # count number of non-negative edges
        logger.debug('[num_nonnegative_edges]: %d' % num_nonnegative_edges)
        
        # get node weights
        num_nonnegative_nodes = 0
        for k_node, v_node in my_nodes.iteritems():
            for tag in [0,1]:
                node_feats = self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges)
                node_weights[k_node + (tag,)] = self.weights.dot(node_feats)
                # cost-augmented decoding
                if node_cost is not None: 
                    if tag == 0 and k_node not in oracle_nodes:
                        curr_node_cost = 0
                    if tag == 1 and k_node in oracle_nodes:
                        curr_node_cost = 0
                    if tag == 1 and k_node not in oracle_nodes:      # false positive
                        curr_node_cost = node_cost
                    if tag == 0 and k_node in oracle_nodes:          # false negative
                        curr_node_cost = node_cost
                    node_weights[k_node + (tag,)] += curr_node_cost
                if tag == 1 and node_weights[k_node + (tag,)] > 0.0: num_nonnegative_nodes += 1
                
        # count number of non-negative nodes
        logger.debug('[num_nonnegative_nodes]: %d' % num_nonnegative_nodes)
        
        # run Gurobi ILP decoder using node and edge weights
        # optionally set the decoded summary length (#nodes and #edges)
        logger.debug('start ILP decoding...')
        num_selected_nodes = num_gold_nodes if oracle_len == 'nodes' else 0
        num_selected_edges = num_gold_edges if oracle_len == 'edges' else 0
        selected_nodes, selected_edges, score_pred = self.gilp.decode(node_weights, edge_weights, root_nodes,
                                                                      num_selected_nodes=num_selected_nodes,
                                                                      num_selected_edges=num_selected_edges)
        logger.debug('[num_gold_nodes]: %d' % num_gold_nodes)
        logger.debug('[num_selected_nodes]: %d' % len(selected_nodes))
        logger.debug('[num_gold_edges]: %d' % num_gold_edges)
        logger.debug('[num_selected_edges]: %d' % len(selected_edges))
        
        # features that are associated with the decoded graph
        feat_vec = FeatureVector()
        
        for k_edge, v_edge in my_edges.iteritems():
            tag = 1 if k_edge in selected_edges else 0  # use decoded tag
            feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)
            
        for k_node, v_node in my_nodes.iteritems():
            tag = 1 if k_node in selected_nodes else 0  # use decoded tag
            feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges)
            
        # return features associated with decoded graph
        return feat_vec, selected_nodes, selected_edges, score_pred
    
    def oracle(self, instance):
        """
        an instance includes:
        my_nodes: (1,) -> AmrNode1, (2,) -> AmrNode2, ...
        my_edges: (1,2) -> AmrEdge1, (2,1) -> AmrEdge2,...
        root_nodes: (1,), (3,),... nodes that are root of sentence
        selected_nodes: (1,), (3,),... nodes contained in summary graph
        selected_edges: (1,2), (3,1),... edges contained in summary graph
        """
        logger.debug('start oracle decoding...')
        
        curr_filename = instance.filename
        my_nodes, oracle_nodes, _ = instance.nodes # nodes and selected nodes
        my_edges, oracle_edges = instance.edges # edges and selected edges
        
        # features that are associated with oracle graph
        feat_vec = FeatureVector()
        
        for k_edge, v_edge in my_edges.iteritems():
            tag = 1 if k_edge in oracle_edges else 0 # use oracle tag
            feat_vec += self.feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)
        
        for k_node, v_node in my_nodes.iteritems():
            tag = 1 if k_node in oracle_nodes else 0 # use oracle tag
            feat_vec += self.feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges)
        
        score_true = self.weights.dot(feat_vec)
        
        # return features associated with oracle graph
        return feat_vec, oracle_nodes, oracle_edges, score_true