if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('dir_name', help='the name of the directory '
                        'containing the FoLiA XML files processed')
    parser.add_argument('dic', help='the liwc dictionary to be used')
    parser.add_argument('out_file', help='csv file to store the results')
    args = parser.parse_args()

    if args.dic.endswith('LIWC_Dutch_dictionary.dic'):
        encoding = 'latin1'
    else:
        encoding = 'utf8'

    liwc_dict, liwc_categories = load_liwc(args.dic, encoding)

    act_tag = '{http://ilk.uvt.nl/folia}div'
    event_tag = '{http://ilk.uvt.nl/folia}event'
    sentence_tag = '{http://ilk.uvt.nl/folia}s'
    word_tag = '{http://ilk.uvt.nl/folia}w'
    text_content_tag = '{http://ilk.uvt.nl/folia}t'

    result = pd.DataFrame(columns=liwc_categories.values()+['#words'])

    xml_files = glob.glob('{}/*.xml'.format(args.dir_name))

    for i, f in enumerate(xml_files):
        print '{} ({} of {})'.format(f, i+1, len(xml_files))
        num_words = 0
        liwc_count = Counter()
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file_in', help='the name of the FoLiA XML file add '
                        'LIWC entities to')
    parser.add_argument('dir_out', help='the name of the directory to save '
                        'the output file to')
    parser.add_argument('dic', help='json file containing liwc dictionary '
                        '(e.g., <embem_data_dir>/dict/historic_'
                        'Dutch_LIWC.dic)')
    args = parser.parse_args()

    file_name = args.file_in
    dir_out = args.dir_out

    liwc_dict, liwc_categories = load_liwc(args.dic, 'utf8')

    # Load document
    context = etree.iterparse(file_name,
                              events=('end',),
                              remove_blank_text=True)
    annotations_tag = '{http://ilk.uvt.nl/folia}annotations'
    sentence_tag = '{http://ilk.uvt.nl/folia}s'
    word_tag = '{http://ilk.uvt.nl/folia}w'
    text_content_tag = '{http://ilk.uvt.nl/folia}t'

    for event, elem in context:
        if elem.tag == annotations_tag:
            # add entity-annotation for liwc
            annotation_attrs = {
                'annotator': 'liwc',