def populate_taxonomy(**kwargs):
    out_dir = kwargs.get('out_dir')

    outfile = os.path.join(out_dir, 'taxonomy.json')
    with open(outfile, 'w') as filehandle:
        for c in tdb.taxonomy():
            row = (c.id, c.label, c.wordclass, c.level, c.branch_size, c.sortcode, c.parent_id)
            data = {fieldname: value for fieldname, value in
                    zip(FIELDS['thesaurusclass'], row)}
            filehandle.write(json.dumps(data))
            filehandle.write('\n')
def options_list(**kwargs):
    """
    Return the list of thesaurus class IDs that will be used as
    the set of options for the classifier to pick.
    """
    # Classifiers will only be built for thesaurus branches between these sizes
    branch_size_min = kwargs.get('min_size', 2500)
    branch_size_max = kwargs.get('max_size', 50000)

    return [t.id for t in tdb.taxonomy(level=5) if
            t.level in (2, 3, 4, 5) and t.wordclass is None and
            t.branch_size <= branch_size_max and
            (t.level == 2 or t.branch_size >= branch_size_min)]