outputdir = sys.argv[2]
        threads = int(sys.argv[3])
    except:
        print >>sys.stderr,"Syntax: sonar_postproc.py inputdir outputdir threads"
        sys.exit(2)
    
    
    cat_freqlist_word = FrequencyList()
    cat_freqlist_lemma = FrequencyList()
    cat_freqlist_lemmapos = FrequencyList()

    maxtasksperchild = 10
    preindex = True
    prevcategory = None
    print >>sys.stderr,"Initialising (indexing)..."
    processor = folia.CorpusProcessor(inputdir, process, threads, 'folia.xml',"",lambda x: True, maxtasksperchild,preindex)
    print >>sys.stderr,"Processing..."
    for i, data in enumerate(processor):
        filepath, freqlist_word, freqlist_lemma, freqlist_lemmapos = data
        if filepath:
            category = None
            for e in filepath.split('/'):
                if e[-4:] != '.xml' and e[:3] == 'WR-' or e[:3] == 'WS-':
                    category = e
            if not category:
                print >>sys.stderr, "No category found for: " + filepath
                sys.exit(2)
               
            if category != prevcategory:
                if prevcategory:
                    print >>sys.stderr,"Saving frequency lists for ", prevcategory
                print >> sys.stderr, "Unable to write file " + outputfile

        except Exception as e:
            print >> sys.stderr, "ERROR: Got exception curating " + filepath + ": ", repr(
                e)


if __name__ == '__main__':
    try:
        inputdir = sys.argv[1]
        outputdir = sys.argv[2]
        threads = int(sys.argv[3])
    except:
        print >> sys.stderr, "Syntax: sonar_postproc.py inputdir outputdir threads"
        sys.exit(2)

    maxtasksperchild = 10
    preindex = True
    prevcategory = None
    print >> sys.stderr, "Initialising (indexing)..."
    processor = folia.CorpusProcessor(
        inputdir, process, threads, 'folia.xml', "",
        lambda x: not os.path.exists(x.replace(inputdir, outputdir)),
        maxtasksperchild, preindex)
    l = len(processor.index)
    print >> sys.stderr, "Indexed " + str(l) + " files for curation"
    print >> sys.stderr, "Processing..."
    for i, _ in enumerate(processor.run()):
        progress = round((i + 1) / float(l) * 100, 1)
        print "#" + str(i) + " - " + str(progress) + '%'
    #Let XSLT do the basic conversion to HTML
    xslt = lxml.etree.parse(xsltfile)
    dcoitofoliatransformer = lxml.etree.XSLT(xslt)

    schema = lxml.etree.RelaxNG(folia.relaxng())

    if foliadir[-1] != '/': foliadir += '/'
    try:
        os.mkdir(foliadir[:-1])
    except:
        pass

    maxtasksperchild = 10
    preindex = True
    processor = folia.CorpusProcessor(sonardir, process, threads, 'pos', "",
                                      lambda x: True, maxtasksperchild,
                                      preindex)
    for i, _ in enumerate(processor):
        progress = round((i + 1) / float(len(processor.index)) * 100, 1)
        print "#" + str(i) + " - " + str(progress) + '%'

    #print "Building index..."
    #index = list(enumerate([ x for x in sonar.CorpusFiles(sonardir,'pos', "", lambda x: True, True) if not outputexists(x, sonardir, foliadir) ]))
    #indexlength = len(index)
    #print str(indexlength) + " documents found in " + sonardir

    #print "Processing..."
    #p = Pool(threads)
    #p.map(process, index )

    print "All done."