def add_heem_classification(soup, heem_tag, internalSystem='HEEM'):
    label_type = get_label_type(heem_tag)
    label_en = heem_labels_en.get(heem_tag)
    #print label_type
    cl = soup.LexicalResource.GlobalInformation.HeemClassification \
             .find(label_type, id=label_en)
    if not cl:
        heem_label = soup.new_tag(label_type, id=label_en)
        heem_label.append(soup.new_tag('Class', value=heem_tag,
                                       internalSystem=internalSystem))
        soup.LexicalResource.GlobalInformation.HeemClassification \
            .append(heem_label)
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('input_dir', help='the directory where the input text '
                        'files can be found.')
    parser.add_argument('output_dir', help='the directory where the output '
                        'files should be written.')
    args = parser.parse_args()

    input_dir = args.input_dir
    output_dir = args.output_dir

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    text_files = [t for t in os.listdir(input_dir) if t.endswith('.txt')]
    for text_file in text_files:
        in_file = os.path.join(input_dir, text_file)
        x_data, y_data = load_data(in_file)
        labels = [y.split('_') for y in y_data]
        #new_labels = []
        out_file = os.path.join(output_dir, text_file)
        #print out_file
        with codecs.open(out_file, 'wb', 'utf-8') as f:
            for i in range(len(labels)):
                ls = labels[i]
                #new_labels.append([heem_labels_en.get(l, 'None') for l in ls])
                new_labels = [heem_labels_en.get(l, 'None') for l in ls]
                #print ls, new_labels
                f.write(u'{}\t{}\n'.format(x_data[i].decode('utf-8'),
                                           '_'.join(new_labels)))