def update_counts(parse_list, emission_counts, transition_counts, tag_counts):
    terminals, tags = utils.get_terminals_tags(parse_list)

    # updating emission counts
    for i in xrange(0,len(terminals)):
        emission_key = tags[i] + '~>' + terminals[i]
        if emission_key in emission_counts:
            emission_counts[emission_key] += 1
        else:
            emission_counts[emission_key] = 1
    
    tags.insert(0, '*')
    tags.append('STOP')
    
    # updating transition counts
    for i in xrange(1, len(tags)):
        transition_key = tags[i-1] + '~>' + tags[i]
        if transition_key in transition_counts:
            transition_counts[transition_key] += 1
        else:
            transition_counts[transition_key] = 1

    # updating tag counts
    for tag in tags:
        if tag in tag_counts:
            tag_counts[tag] += 1
        else:
            tag_counts[tag] = 1
def run(sentence, tagset, preterms, start, hmm_prob, pcfg, pcfg_u):
    max_iterations = 20
    step_size = 1.0

    n = len(sentence)

    u = defaultdict() # dual decomposition parameter
    init_dd_param(u, n, tagset)

    k = 0 # number of iterations
    while k < max_iterations:
          
        tags = viterbi.run(sentence, tagset, hmm_prob, u)

        parse = fast_cky.run(sentence, preterms, start, pcfg, pcfg_u, u)
        parse_list = utils.make_parse_list(parse)
        terms, parse_tags = utils.get_terminals_tags(parse_list)

        print k
        print tags, "tagger"
        print parse_tags, "parser"

        if agree(parse_tags, tags):
            return k, tags, parse  # converges in the kth iteration
        
        y = compute_indicators(tags, tagset)
        z = compute_indicators(parse_tags, tagset)
        k += 1
        step_size = 1.0/k
        update(y, z, u, step_size)

    return -1, tags, parse # does not converge
def get_sentences(dev_file):
    dev = open(dev_file, 'r')
    sentences = []
    while 1:
        line = dev.readline()
        if not line:
            break
        line = line.strip()
        parse = utils.make_parse_list(line)
        sentence, tags = utils.get_terminals_tags(parse)
        sentences.append(sentence)
    return sentences
def extract_tagging_data(treebank, st_filename, wt_filename):
    sentences = []
    tagseqs = []

    parses = read_parses_no_indent(treebank)
    for parse in parses:
        parse_list = utils.make_parse_list(parse)
        sentence, truetag = utils.get_terminals_tags(parse_list)
        sentences.append(sentence)
        tagseqs.append(truetag)

    write_tagging_data(sentences, tagseqs, st_filename, wt_filename)
    return sentences, tagseqs
def quick_execute(dev):
    print "loading learnt parameters..."
    pcfg_prob, nonterms, start = cky.get_pcfg()
    hmm, tagset = viterbi.get_hmm_tagset()

    print "reading dev data..."
    parses = utils.read_parses_no_indent(dev)

    i = 0
    for parse in parses:
        if len(parse) > 100:
            parse_list = utils.make_parse_list(parse)
            sentence, truetags = utils.get_terminals_tags(parse_list)
            print '\n', sentence, '\n'
            #print dev_sentences.index(sentence)
            print "running dual decomposition..."
            num_iterations = dd_parser_tagger.run(sentence, pcfg_prob, nonterms, start, tagset, hmm)
            print "\n", truetags, " :true tags"
            if num_iterations != -1:
                print "converges in ", num_iterations ," iterations \n"
            else:
                print "does not converge :(\n"