with codecs.open(args.dict_file, 'rb', 'latin1') as f:
        lines = f.readlines()

    count = 0

    spelling_vars = {}
    for line in lines:
        count += 1
        entry = line.split(';')
        # lexicon service needs lower case input
        term = entry[0].lower()
        term = term.replace('"', '')
        while True:
            try:
                sleep(1)
                words = get_spelling_variants(term, [], 1600, 1830)
                words = list(set(words))
                break
            except:
                print 'Retry!'
                sleep(5)
                pass

        if len(words) > 0:
            spelling_vars[term] = words

        if count % 1000 == 0:
            print count

        print term, words
    liwc_category_output = []
    spelling_vars = {}
    liwc_output = {}
    for line in lines:
        # legend
        if line[0].isdigit() or line.startswith(('%', '\r')):
            liwc_category_output.append(line.strip())
        # word
        else:
            entry = line.split()
            # lexicon service needs lower case input
            term = entry[0].lower()
            categories = entry[1:]
            sleep(0.3)
            words = get_spelling_variants(term, categories, 1600, 1830)
            words.append(term)
            words = list(set(words))

            spelling_vars[term] = words

            print term, words
            for word in words:
                if liwc_output.get(
                        word) and not categories == liwc_output[word]:
                    new_c = list(set(categories + liwc_output.get(word)))
                    new_c.sort()
                    liwc_output[word] = new_c
                else:
                    liwc_output[word] = categories
    #with codecs.open('liwc_output.json', 'w', 'utf8') as f: