Example #1
0
    def learnParamsAdaGrad(self,
                           decoder,
                           corpus,
                           param_file,
                           loss_func,
                           num_passes=10,
                           oracle_len='nolen'):
        """
        learn parameters using Structured Perceptron, Ramp Loss, (Hinge??)
        """
        logger.debug('start learning parameters...')
        shuffle(corpus)  # shuffle corpus

        avg_weights = FeatureVector()
        curr_instances = 0

        node_perf = PerfScore(0.0, 0.0, 0.0)  # node performance
        edge_perf = PerfScore(0.0, 0.0, 0.0)  # edge performance

        eta = 1.0  # stepsize
        l2reg = 0.0  #
        node_cost_scaling = 1.0  # cost scaling factor
        edge_cost_scaling = 1.0  # cost scaling factor

        sumSq = FeatureVector()

        for curr_num_passes in xrange(1, num_passes + 1):
            logger.debug('#curr_num_passes#: %d' % curr_num_passes)

            for instance in corpus:
                curr_instances += 1
                logger.debug('processing instance %d...' % curr_instances)

                # perceptron loss
                if loss_func.startswith('perceptron'):
                    gradient, selected_nodes, selected_edges, score_pred = decoder.decode(
                        instance, oracle_len)
                    plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(
                        instance)

                    curr_loss = score_pred - score_true  # @UnusedVariable
                    gradient -= plus_feats
                    decoder.weights -= eta * gradient

                # ramp loss + cost-augmented decoding
                if loss_func.startswith('ramp'):
                    node_cost, edge_cost = node_cost_scaling, edge_cost_scaling
                    gradient, _, _, score_plus_cost = decoder.decode(
                        instance, oracle_len, node_cost, edge_cost)
                    plus_feats, selected_nodes, selected_edges, score_minus_cost = decoder.decode(
                        instance, oracle_len, -1.0 * node_cost,
                        -1.0 * edge_cost)
                    _, oracle_nodes, oracle_edges, score_true = decoder.oracle(
                        instance)

                    curr_loss = score_plus_cost - score_minus_cost  # @UnusedVariable
                    gradient -= plus_feats

                    for k, v in gradient.iteritems():
                        if v == 0.0: continue
                        sumSq[k] = sumSq.get(k, 0.0) + v * v
                        decoder.weights[k] = decoder.weights.get(
                            k, 0.0) - eta * v / sqrt(sumSq[k])

                    if l2reg != 0.0:
                        for k, v in decoder.weights.iteritems():
                            if v == 0.0: continue
                            value = l2reg * v
                            sumSq[k] = sumSq.get(k, 0.0) + value * value
                            decoder.weights[k] = v - eta * value / sqrt(
                                sumSq[k])

                # hinge-loss + cost-augmented decoding
                if loss_func.startswith('hinge'):
                    node_cost, edge_cost = node_cost_scaling, edge_cost_scaling
                    gradient, selected_nodes, selected_edges, score_plus_cost = decoder.decode(
                        instance, oracle_len, node_cost, edge_cost)
                    plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(
                        instance)

                    curr_loss = score_plus_cost - score_true  # @UnusedVariable
                    gradient -= plus_feats

                    for k, v in gradient.iteritems():
                        if v == 0.0: continue
                        sumSq[k] = sumSq.get(k, 0.0) + v * v
                        decoder.weights[k] = decoder.weights.get(
                            k, 0.0) - eta * v / sqrt(sumSq[k])

                    if l2reg != 0.0:
                        for k, v in decoder.weights.iteritems():
                            if v == 0.0: continue
                            value = l2reg * v
                            sumSq[k] = sumSq.get(k, 0.0) + value * value
                            decoder.weights[k] = v - eta * value / sqrt(
                                sumSq[k])

                # use gold nodes and edges to calculate P/R/F
                num_gold_nodes, num_gold_edges = instance.gold
                # P/R/F scores of nodes and edges, for current instance
                # Edge recall can not reach %100 since decoding produces only tree structure
                intersect_nodes = set(selected_nodes) & set(oracle_nodes)
                curr_node_perf = getPRFScores(len(intersect_nodes),
                                              len(selected_nodes),
                                              num_gold_nodes)
                logPRFScores('train_node', curr_node_perf)

                intersect_edges = set(selected_edges) & set(oracle_edges)
                curr_edge_perf = getPRFScores(len(intersect_edges),
                                              len(selected_edges),
                                              num_gold_edges)
                logPRFScores('train_edge', curr_edge_perf)

                # P/R/F scores of nodes and edges, averaged across all curr_instances
                node_perf = PerfScore(
                    *[sum(x) for x in zip(node_perf, curr_node_perf)])
                edge_perf = PerfScore(
                    *[sum(x) for x in zip(edge_perf, curr_edge_perf)])

                logPRFScores(
                    'train_node_avg',
                    PerfScore(node_perf.prec / curr_instances,
                              node_perf.rec / curr_instances,
                              node_perf.fscore / curr_instances))
                logPRFScores(
                    'train_edge_avg',
                    PerfScore(edge_perf.prec / curr_instances,
                              edge_perf.rec / curr_instances,
                              edge_perf.fscore / curr_instances))

            # averaging weight vectors
            avg_weights += decoder.weights

            # output averaged weight vectors to file
            curr_weights = FeatureVector()
            curr_weights += avg_weights * (1 / curr_num_passes)
            if param_file:
                with codecs.open(param_file, 'w', 'utf-8') as outfile:
                    outfile.write('#curr_num_passes#: %d\n' % curr_num_passes)
                    outfile.write('%s\n' % curr_weights.toString())

        final_weights = FeatureVector()
        final_weights += avg_weights * (1 / num_passes)
        return final_weights
Example #2
0
        curr_filename = inst.filename
        my_nodes, s_nodes = inst.nodes
        my_edges, s_edges = inst.edges
        
#         logger.debug('extracting features for file: %s' % curr_filename)
#         for k_edge, v_edge in my_edges.iteritems():
#             for tag in [0,1]:
#                 feat_vec += feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)
                
        logger.debug('extracting features for file: %s' % curr_filename)
        for k_node, v_node in my_nodes.iteritems():
            for tag in [0,1]:
                feat_vec += feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges)
    
    with codecs.open('output_file', 'w', 'utf-8') as outfile:
        outfile.write('%s\n' % feat_vec.toString())
        
    
    
    










Example #3
0
if __name__ == '__main__':

    input_dir = '/Users/user/Data/SemanticSumm/Proxy/gold/split/dev/'
    body_file = 'aligned-amr-release-1.0-dev-proxy-body.txt'
    summ_file = 'aligned-amr-release-1.0-dev-proxy-summary.txt'

    corpus = buildCorpus(os.path.join(input_dir, body_file),
                         os.path.join(input_dir, summ_file))
    feat_extr = FeatureExtractor()
    feat_vec = FeatureVector()

    for inst in corpus:
        curr_filename = inst.filename
        my_nodes, s_nodes = inst.nodes
        my_edges, s_edges = inst.edges

        #         logger.debug('extracting features for file: %s' % curr_filename)
        #         for k_edge, v_edge in my_edges.iteritems():
        #             for tag in [0,1]:
        #                 feat_vec += feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges)

        logger.debug('extracting features for file: %s' % curr_filename)
        for k_node, v_node in my_nodes.iteritems():
            for tag in [0, 1]:
                feat_vec += feat_extr.getNodeFeats(k_node, v_node, tag,
                                                   curr_filename, my_nodes,
                                                   my_edges)

    with codecs.open('output_file', 'w', 'utf-8') as outfile:
        outfile.write('%s\n' % feat_vec.toString())
Example #4
0
    def learnParamsAdaGrad(self, decoder, corpus, param_file, loss_func, num_passes=10, oracle_len='nolen'):
        """
        learn parameters using Structured Perceptron, Ramp Loss, (Hinge??)
        """        
        logger.debug('start learning parameters...')
        shuffle(corpus) # shuffle corpus
        
        avg_weights = FeatureVector()
        curr_instances = 0
        
        node_perf = PerfScore(0.0, 0.0, 0.0) # node performance
        edge_perf = PerfScore(0.0, 0.0, 0.0) # edge performance
        
        eta = 1.0 # stepsize
        l2reg = 0.0 # 
        node_cost_scaling = 1.0 # cost scaling factor
        edge_cost_scaling = 1.0 # cost scaling factor
        
        sumSq = FeatureVector()
        
        for curr_num_passes in xrange(1, num_passes+1):
            logger.debug('#curr_num_passes#: %d' % curr_num_passes)
            
            for instance in corpus:
                curr_instances += 1
                logger.debug('processing instance %d...' % curr_instances)
                
                # perceptron loss
                if loss_func.startswith('perceptron'):
                    gradient, selected_nodes, selected_edges, score_pred = decoder.decode(instance, oracle_len)
                    plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance)
                    
                    curr_loss = score_pred - score_true  # @UnusedVariable
                    gradient -= plus_feats
                    decoder.weights -= eta * gradient
                
                # ramp loss + cost-augmented decoding
                if loss_func.startswith('ramp'):                         
                    node_cost, edge_cost = node_cost_scaling, edge_cost_scaling
                    gradient, _, _, score_plus_cost = decoder.decode(instance, oracle_len, node_cost, edge_cost)
                    plus_feats, selected_nodes, selected_edges, score_minus_cost = decoder.decode(instance, oracle_len, -1.0 * node_cost, -1.0 * edge_cost)
                    _, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance)
                
                    curr_loss = score_plus_cost - score_minus_cost  # @UnusedVariable
                    gradient -= plus_feats
                    
                    for k, v in gradient.iteritems():
                        if v == 0.0: continue
                        sumSq[k] = sumSq.get(k, 0.0) + v * v
                        decoder.weights[k] = decoder.weights.get(k, 0.0) - eta * v / sqrt(sumSq[k])
                    
                    if l2reg != 0.0:
                        for k, v in decoder.weights.iteritems():
                            if v == 0.0: continue
                            value = l2reg * v
                            sumSq[k] = sumSq.get(k, 0.0) + value * value
                            decoder.weights[k] = v - eta * value / sqrt(sumSq[k])
                
                # hinge-loss + cost-augmented decoding
                if loss_func.startswith('hinge'): 
                    node_cost, edge_cost = node_cost_scaling, edge_cost_scaling
                    gradient, selected_nodes, selected_edges, score_plus_cost = decoder.decode(instance, oracle_len, node_cost, edge_cost)
                    plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance)
    
                    curr_loss = score_plus_cost - score_true  # @UnusedVariable
                    gradient -= plus_feats

                    for k, v in gradient.iteritems():
                        if v == 0.0: continue
                        sumSq[k] = sumSq.get(k, 0.0) + v * v
                        decoder.weights[k] = decoder.weights.get(k, 0.0) - eta * v / sqrt(sumSq[k])
                    
                    if l2reg != 0.0:
                        for k, v in decoder.weights.iteritems():
                            if v == 0.0: continue
                            value = l2reg * v
                            sumSq[k] = sumSq.get(k, 0.0) + value * value
                            decoder.weights[k] = v - eta * value / sqrt(sumSq[k])
                
                # use gold nodes and edges to calculate P/R/F
                num_gold_nodes, num_gold_edges = instance.gold
                # P/R/F scores of nodes and edges, for current instance
                # Edge recall can not reach %100 since decoding produces only tree structure
                intersect_nodes = set(selected_nodes) & set(oracle_nodes)
                curr_node_perf = getPRFScores(len(intersect_nodes), len(selected_nodes), num_gold_nodes)
                logPRFScores('train_node', curr_node_perf)
                
                intersect_edges = set(selected_edges) & set(oracle_edges)
                curr_edge_perf = getPRFScores(len(intersect_edges), len(selected_edges), num_gold_edges)
                logPRFScores('train_edge', curr_edge_perf)

                # P/R/F scores of nodes and edges, averaged across all curr_instances
                node_perf = PerfScore(*[sum(x) for x in zip(node_perf, curr_node_perf)])
                edge_perf = PerfScore(*[sum(x) for x in zip(edge_perf, curr_edge_perf)])
                
                logPRFScores('train_node_avg', 
                             PerfScore(node_perf.prec/curr_instances, node_perf.rec/curr_instances, 
                                       node_perf.fscore/curr_instances))              
                logPRFScores('train_edge_avg', 
                             PerfScore(edge_perf.prec/curr_instances, edge_perf.rec/curr_instances, 
                                       edge_perf.fscore/curr_instances))
                                
            # averaging weight vectors
            avg_weights += decoder.weights
            
            # output averaged weight vectors to file
            curr_weights = FeatureVector()
            curr_weights += avg_weights * (1/curr_num_passes)
            if param_file:
                with codecs.open(param_file, 'w', 'utf-8') as outfile:
                    outfile.write('#curr_num_passes#: %d\n' % curr_num_passes)
                    outfile.write('%s\n' % curr_weights.toString())

        final_weights = FeatureVector()
        final_weights += avg_weights * (1/num_passes)
        return final_weights