def learnParamsAdaGrad(self, decoder, corpus, param_file, loss_func, num_passes=10, oracle_len='nolen'): """ learn parameters using Structured Perceptron, Ramp Loss, (Hinge??) """ logger.debug('start learning parameters...') shuffle(corpus) # shuffle corpus avg_weights = FeatureVector() curr_instances = 0 node_perf = PerfScore(0.0, 0.0, 0.0) # node performance edge_perf = PerfScore(0.0, 0.0, 0.0) # edge performance eta = 1.0 # stepsize l2reg = 0.0 # node_cost_scaling = 1.0 # cost scaling factor edge_cost_scaling = 1.0 # cost scaling factor sumSq = FeatureVector() for curr_num_passes in xrange(1, num_passes + 1): logger.debug('#curr_num_passes#: %d' % curr_num_passes) for instance in corpus: curr_instances += 1 logger.debug('processing instance %d...' % curr_instances) # perceptron loss if loss_func.startswith('perceptron'): gradient, selected_nodes, selected_edges, score_pred = decoder.decode( instance, oracle_len) plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle( instance) curr_loss = score_pred - score_true # @UnusedVariable gradient -= plus_feats decoder.weights -= eta * gradient # ramp loss + cost-augmented decoding if loss_func.startswith('ramp'): node_cost, edge_cost = node_cost_scaling, edge_cost_scaling gradient, _, _, score_plus_cost = decoder.decode( instance, oracle_len, node_cost, edge_cost) plus_feats, selected_nodes, selected_edges, score_minus_cost = decoder.decode( instance, oracle_len, -1.0 * node_cost, -1.0 * edge_cost) _, oracle_nodes, oracle_edges, score_true = decoder.oracle( instance) curr_loss = score_plus_cost - score_minus_cost # @UnusedVariable gradient -= plus_feats for k, v in gradient.iteritems(): if v == 0.0: continue sumSq[k] = sumSq.get(k, 0.0) + v * v decoder.weights[k] = decoder.weights.get( k, 0.0) - eta * v / sqrt(sumSq[k]) if l2reg != 0.0: for k, v in decoder.weights.iteritems(): if v == 0.0: continue value = l2reg * v sumSq[k] = sumSq.get(k, 0.0) + value * value decoder.weights[k] = v - eta * value / sqrt( sumSq[k]) # hinge-loss + cost-augmented decoding if loss_func.startswith('hinge'): node_cost, edge_cost = node_cost_scaling, edge_cost_scaling gradient, selected_nodes, selected_edges, score_plus_cost = decoder.decode( instance, oracle_len, node_cost, edge_cost) plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle( instance) curr_loss = score_plus_cost - score_true # @UnusedVariable gradient -= plus_feats for k, v in gradient.iteritems(): if v == 0.0: continue sumSq[k] = sumSq.get(k, 0.0) + v * v decoder.weights[k] = decoder.weights.get( k, 0.0) - eta * v / sqrt(sumSq[k]) if l2reg != 0.0: for k, v in decoder.weights.iteritems(): if v == 0.0: continue value = l2reg * v sumSq[k] = sumSq.get(k, 0.0) + value * value decoder.weights[k] = v - eta * value / sqrt( sumSq[k]) # use gold nodes and edges to calculate P/R/F num_gold_nodes, num_gold_edges = instance.gold # P/R/F scores of nodes and edges, for current instance # Edge recall can not reach %100 since decoding produces only tree structure intersect_nodes = set(selected_nodes) & set(oracle_nodes) curr_node_perf = getPRFScores(len(intersect_nodes), len(selected_nodes), num_gold_nodes) logPRFScores('train_node', curr_node_perf) intersect_edges = set(selected_edges) & set(oracle_edges) curr_edge_perf = getPRFScores(len(intersect_edges), len(selected_edges), num_gold_edges) logPRFScores('train_edge', curr_edge_perf) # P/R/F scores of nodes and edges, averaged across all curr_instances node_perf = PerfScore( *[sum(x) for x in zip(node_perf, curr_node_perf)]) edge_perf = PerfScore( *[sum(x) for x in zip(edge_perf, curr_edge_perf)]) logPRFScores( 'train_node_avg', PerfScore(node_perf.prec / curr_instances, node_perf.rec / curr_instances, node_perf.fscore / curr_instances)) logPRFScores( 'train_edge_avg', PerfScore(edge_perf.prec / curr_instances, edge_perf.rec / curr_instances, edge_perf.fscore / curr_instances)) # averaging weight vectors avg_weights += decoder.weights # output averaged weight vectors to file curr_weights = FeatureVector() curr_weights += avg_weights * (1 / curr_num_passes) if param_file: with codecs.open(param_file, 'w', 'utf-8') as outfile: outfile.write('#curr_num_passes#: %d\n' % curr_num_passes) outfile.write('%s\n' % curr_weights.toString()) final_weights = FeatureVector() final_weights += avg_weights * (1 / num_passes) return final_weights
curr_filename = inst.filename my_nodes, s_nodes = inst.nodes my_edges, s_edges = inst.edges # logger.debug('extracting features for file: %s' % curr_filename) # for k_edge, v_edge in my_edges.iteritems(): # for tag in [0,1]: # feat_vec += feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) logger.debug('extracting features for file: %s' % curr_filename) for k_node, v_node in my_nodes.iteritems(): for tag in [0,1]: feat_vec += feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) with codecs.open('output_file', 'w', 'utf-8') as outfile: outfile.write('%s\n' % feat_vec.toString())
if __name__ == '__main__': input_dir = '/Users/user/Data/SemanticSumm/Proxy/gold/split/dev/' body_file = 'aligned-amr-release-1.0-dev-proxy-body.txt' summ_file = 'aligned-amr-release-1.0-dev-proxy-summary.txt' corpus = buildCorpus(os.path.join(input_dir, body_file), os.path.join(input_dir, summ_file)) feat_extr = FeatureExtractor() feat_vec = FeatureVector() for inst in corpus: curr_filename = inst.filename my_nodes, s_nodes = inst.nodes my_edges, s_edges = inst.edges # logger.debug('extracting features for file: %s' % curr_filename) # for k_edge, v_edge in my_edges.iteritems(): # for tag in [0,1]: # feat_vec += feat_extr.getEdgeFeats(k_edge, v_edge, tag, curr_filename, my_nodes, my_edges) logger.debug('extracting features for file: %s' % curr_filename) for k_node, v_node in my_nodes.iteritems(): for tag in [0, 1]: feat_vec += feat_extr.getNodeFeats(k_node, v_node, tag, curr_filename, my_nodes, my_edges) with codecs.open('output_file', 'w', 'utf-8') as outfile: outfile.write('%s\n' % feat_vec.toString())
def learnParamsAdaGrad(self, decoder, corpus, param_file, loss_func, num_passes=10, oracle_len='nolen'): """ learn parameters using Structured Perceptron, Ramp Loss, (Hinge??) """ logger.debug('start learning parameters...') shuffle(corpus) # shuffle corpus avg_weights = FeatureVector() curr_instances = 0 node_perf = PerfScore(0.0, 0.0, 0.0) # node performance edge_perf = PerfScore(0.0, 0.0, 0.0) # edge performance eta = 1.0 # stepsize l2reg = 0.0 # node_cost_scaling = 1.0 # cost scaling factor edge_cost_scaling = 1.0 # cost scaling factor sumSq = FeatureVector() for curr_num_passes in xrange(1, num_passes+1): logger.debug('#curr_num_passes#: %d' % curr_num_passes) for instance in corpus: curr_instances += 1 logger.debug('processing instance %d...' % curr_instances) # perceptron loss if loss_func.startswith('perceptron'): gradient, selected_nodes, selected_edges, score_pred = decoder.decode(instance, oracle_len) plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance) curr_loss = score_pred - score_true # @UnusedVariable gradient -= plus_feats decoder.weights -= eta * gradient # ramp loss + cost-augmented decoding if loss_func.startswith('ramp'): node_cost, edge_cost = node_cost_scaling, edge_cost_scaling gradient, _, _, score_plus_cost = decoder.decode(instance, oracle_len, node_cost, edge_cost) plus_feats, selected_nodes, selected_edges, score_minus_cost = decoder.decode(instance, oracle_len, -1.0 * node_cost, -1.0 * edge_cost) _, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance) curr_loss = score_plus_cost - score_minus_cost # @UnusedVariable gradient -= plus_feats for k, v in gradient.iteritems(): if v == 0.0: continue sumSq[k] = sumSq.get(k, 0.0) + v * v decoder.weights[k] = decoder.weights.get(k, 0.0) - eta * v / sqrt(sumSq[k]) if l2reg != 0.0: for k, v in decoder.weights.iteritems(): if v == 0.0: continue value = l2reg * v sumSq[k] = sumSq.get(k, 0.0) + value * value decoder.weights[k] = v - eta * value / sqrt(sumSq[k]) # hinge-loss + cost-augmented decoding if loss_func.startswith('hinge'): node_cost, edge_cost = node_cost_scaling, edge_cost_scaling gradient, selected_nodes, selected_edges, score_plus_cost = decoder.decode(instance, oracle_len, node_cost, edge_cost) plus_feats, oracle_nodes, oracle_edges, score_true = decoder.oracle(instance) curr_loss = score_plus_cost - score_true # @UnusedVariable gradient -= plus_feats for k, v in gradient.iteritems(): if v == 0.0: continue sumSq[k] = sumSq.get(k, 0.0) + v * v decoder.weights[k] = decoder.weights.get(k, 0.0) - eta * v / sqrt(sumSq[k]) if l2reg != 0.0: for k, v in decoder.weights.iteritems(): if v == 0.0: continue value = l2reg * v sumSq[k] = sumSq.get(k, 0.0) + value * value decoder.weights[k] = v - eta * value / sqrt(sumSq[k]) # use gold nodes and edges to calculate P/R/F num_gold_nodes, num_gold_edges = instance.gold # P/R/F scores of nodes and edges, for current instance # Edge recall can not reach %100 since decoding produces only tree structure intersect_nodes = set(selected_nodes) & set(oracle_nodes) curr_node_perf = getPRFScores(len(intersect_nodes), len(selected_nodes), num_gold_nodes) logPRFScores('train_node', curr_node_perf) intersect_edges = set(selected_edges) & set(oracle_edges) curr_edge_perf = getPRFScores(len(intersect_edges), len(selected_edges), num_gold_edges) logPRFScores('train_edge', curr_edge_perf) # P/R/F scores of nodes and edges, averaged across all curr_instances node_perf = PerfScore(*[sum(x) for x in zip(node_perf, curr_node_perf)]) edge_perf = PerfScore(*[sum(x) for x in zip(edge_perf, curr_edge_perf)]) logPRFScores('train_node_avg', PerfScore(node_perf.prec/curr_instances, node_perf.rec/curr_instances, node_perf.fscore/curr_instances)) logPRFScores('train_edge_avg', PerfScore(edge_perf.prec/curr_instances, edge_perf.rec/curr_instances, edge_perf.fscore/curr_instances)) # averaging weight vectors avg_weights += decoder.weights # output averaged weight vectors to file curr_weights = FeatureVector() curr_weights += avg_weights * (1/curr_num_passes) if param_file: with codecs.open(param_file, 'w', 'utf-8') as outfile: outfile.write('#curr_num_passes#: %d\n' % curr_num_passes) outfile.write('%s\n' % curr_weights.toString()) final_weights = FeatureVector() final_weights += avg_weights * (1/num_passes) return final_weights