def output_trial_data(lexicon, wordpath, outpath, irregulars, ag_items):
    """Augment trial data with information about each word."""

    word_fields = ['word', 'analysis', 'root', 'n.suffixes', 'suffix', 'n.syll', 'n.phon',
                   'OLD', 'PLD', 'sbtlx.freq', 'hal.freq', 'celex.freq', 'kf.freq',
                   'sbtlx.basefreq', 'hal.basefreq', 'celex.basefreq', 'kf.basefreq',
                   'sbtlx.suffixfreq', 'hal.suffixfreq', 'celex.suffixfreq', 'kf.suffixfreq',
                   'sbtlx.pformbase', 'hal.pformbase', 'celex.pformbase', 'kf.pformbase',
                   'sbtlx.wordrank', 'sbtlx.baserank', 'sbtlx.freqgreaterroot',
                   'ag.kf.clusterfreq', 'length',
                   'proper', 'bare', 'bimorph', 'inflectional', 'derivational',
                   'irreg', 'in.ag', 'ag.exp', 'inflect.family', 'exclude']

    # Open input
    try:
        infile = open(wordpath, 'rU')
        reader = csv.DictReader(infile)
    except IOError:
        print >> sys.stderr, "Couldn't open input file at", outpath
        sys.exit(1)

    # Open output
    try:
        outfile = open(outpath, 'wb')
        writer = csv.DictWriter(outfile, word_fields)
    except IOError:
        print >> sys.stderr, "Couldn't open output file at", outpath
        sys.exit(1)

    # Write out header and data
    writer.writeheader()
    nrows = 0
    nexclusions = 0
    for row in reader:
        try:
            word = lexicon[row['Word']]
        except KeyError:
            nexclusions += 1
            continue

        # Skip words that didn't appear in the original source
        if word.fake:
            continue

        proper = word.text[0].isupper()
        nsuffixes = len(word.suffixes) if word.suffixes else '0'
        suffix = word.suffixes[-1] if word.suffixes else "null"
        bare = True if not word.suffixes and not word.prefixes else False
        bimorph = True if (word.suffixes and not word.prefixes and
                           len(word.suffixes) == 1) else False
        irreg = word.text in irregulars and not irregulars[word.text].compound
        in_ag = word.text in ag_items
        ag_kf_clusterfreq = ag_items[word.text].kf_cluster_freq if word.text in ag_items else None
        ag_exp = ag_items[word.text].experiment if word.text in ag_items else None
        # Whether it is in an inflectional family of size > 1
        inflect_family = ((word.root in lexicon.inflect_sets) and
                          (word.text in lexicon.inflect_sets[word.root]) and
                          (len(lexicon.inflect_sets[word.root]) > 1))
        # Whether the frequency is greater than the root, which can be T/F or NA
        sbtlx_freq_greater_root = lexicon.freq_greater_root_sbtlx(word)
        # Whether we just don't like this item
        exclude = exclude_item(word.text)

        # Create the row
        row = dict((
            ('word', word.text),
            ('analysis', word.analysis),
            ('root', na_none(word.root)),
            ('n.suffixes', nsuffixes),
            ('suffix', na_none(suffix)),
            ('n.syll', na_null(row['NSyll'])),
            ('n.phon', na_null(row['NPhon'])),
            ('length', word.length),
            ('OLD', na_null(row['OLD'])),
            ('PLD', na_null(row['PLD'])),
            ('suffix', na_none(suffix)),
            ('sbtlx.freq', word.freq_sbtlx),
            ('hal.freq', word.freq_hal),
            ('celex.freq', word.freq_celex),
            ('kf.freq', word.freq_kf),
            ('sbtlx.basefreq', na_none(lexicon.base_freq_sbtlx(word.root))),
            ('hal.basefreq', na_none(lexicon.base_freq_hal(word.root))),
            ('celex.basefreq', na_none(lexicon.base_freq_celex(word.root))),
            ('kf.basefreq', na_none(lexicon.base_freq_kf(word.root))),
            ('sbtlx.suffixfreq', na_none(lexicon.suffix_freq_sbtlx(suffix))),
            ('hal.suffixfreq', na_none(lexicon.suffix_freq_hal(suffix))),
            ('celex.suffixfreq', na_none(lexicon.suffix_freq_celex(suffix))),
            ('kf.suffixfreq', na_none(lexicon.suffix_freq_kf(suffix))),
            ('sbtlx.pformbase', na_none(lexicon.p_form_base_sbtlx(word))),
            ('hal.pformbase', na_none(lexicon.p_form_base_hal(word))),
            ('celex.pformbase', na_none(lexicon.p_form_base_celex(word))),
            ('kf.pformbase', na_none(lexicon.p_form_base_kf(word))),
            ('sbtlx.wordrank', na_none(lexicon.word_rank(word))),
            ('sbtlx.baserank', na_none(lexicon.base_rank(word.root))),
            ('sbtlx.freqgreaterroot', convert_r_bool(sbtlx_freq_greater_root)),
            ('ag.kf.clusterfreq', na_none(ag_kf_clusterfreq)),
            ('proper', convert_r_bool(proper)),
            ('bare', convert_r_bool(bare)),
            ('bimorph', convert_r_bool(bimorph)),
            ('inflectional', convert_r_bool(word.inflectional)),
            ('derivational', convert_r_bool(word.derivational)),
            ('irreg', convert_r_bool(irreg)),
            ('in.ag', convert_r_bool(in_ag)),
            ('ag.exp', na_none(ag_exp)),
            ('inflect.family', convert_r_bool(inflect_family)),
            ('exclude', convert_r_bool(exclude)),
        ))
        writer.writerow(row)
        nrows += 1

    outfile.close()

    print "Excluded %d items." % nexclusions
    print "Wrote %d rows to %s." % (nrows, outpath)
def main():
    """Output data on irregular forms."""
    parser = argparse.ArgumentParser(description=main.__doc__)
    parser.add_argument('irregulardata', help='CSV file containing irregular verb information')
    parser.add_argument('subtlexpath', help='SUBTLEX frequency norms')
    parser.add_argument('outputpath', help='output file')
    args = parser.parse_args()
    subtlexpath = args.subtlexpath
    irregpath = args.irregulardata
    outpath = args.outputpath

    print "Loading SUBTLEX frequency data..."
    subtlex = SubtlexDict(subtlexpath)
    print "Loaded frequency information for %d words from %s." % (len(subtlex), repr(subtlexpath))

    print "Loading irregulars..."
    irregulars = parse_irregulars(irregpath)
    print "Read %d irregular verbs from %s." % (len(irregulars), repr(irregpath))


    # Get canonical frequencies
    freqs = {}
    for word in irregulars:
        try:
            freqs[word] = subtlex[word].freq_count_low
        except KeyError:
            try:
                freqs[word] = subtlex[word.capitalize()].freq_count_low
            except KeyError:
                freqs[word] = 0

    # Compute irregular counts
    irregular_rule_counts = count_irregular_rules(irregulars, True, subtlex)
    irregular_rule_ranks = {rule: (idx + 1) for idx, rule in
                            enumerate(sorted(irregular_rule_counts, key=irregular_rule_counts.get,
                                             reverse=True))}
    irregular_freq_ranks = {rule: (idx + 1) for idx, rule in
                            enumerate(sorted(irregulars, key=freqs.get,
                                             reverse=True))}
    # Compute total ranking by class order
    irregular_class_freqs = {word.word: (irregular_rule_counts[word.class_key], freqs[word.word])
                             for word in irregulars.itervalues()}
    print sorted(irregular_class_freqs.items())
    irregular_classfreq_ranks = {word: (idx + 1) for idx, word in
                                enumerate(sorted(irregulars, key=irregular_class_freqs.get,
                                                 reverse=True))}

    # Open output
    fields = ['word', 'pastsuffix', 'pastexclude', 'pastrulecount', 'pastrulerank',
              'stemchange', 'sbtlx.freq', 'irregfreqrank', 'irregclassfreqrank']
    try:
        outfile = open(outpath, 'wb')
        writer = csv.DictWriter(outfile, fields)
    except IOError:
        print >> sys.stderr, "Couldn't open output file at", outpath
        sys.exit(1)

    # Output data
    writer.writeheader()
    nrows = 0
    nexclusions = 0
    for word in irregulars:
        pastsuffix = irregulars[word].suffix
        stem_change = irregulars[word].stem_change_nodevoice
        past_exclude = irregulars[word].exclude
        past_rule_count = irregular_rule_counts[irregulars[word].class_key]
        past_rule_rank = irregular_rule_ranks[irregulars[word].class_key]

        row = dict((
            ('word', word),
            ('pastsuffix', na_none(pastsuffix)),
            ('pastexclude', convert_r_bool(past_exclude)),
            ('pastrulecount', na_none(past_rule_count)),
            ('pastrulerank', na_none(past_rule_rank)),
            ('stemchange', na_none(stem_change)),
            ('sbtlx.freq', freqs[word]),
            ('irregfreqrank', irregular_freq_ranks[word]),
            ('irregclassfreqrank', irregular_classfreq_ranks[word]),
        ))
        writer.writerow(row)

        # Count rows and exclusions
        nrows += 1
        if past_exclude:
            nexclusions += 1

    # Clean up
    outfile.close()

    print "Marked %d items as excluded." % nexclusions
    print "Wrote %d rows to %s." % (nrows, outpath)