def parse_directory(path, **kwargs):
    logfile = initialize_logfile(kwargs['logdir'])
    for file in os.listdir(path):
        print file
        print path
        # we don't process the daily digest or front matter.
        if file.find('FrontMatter') != -1 or file.find('PgD') != -1:
            continue
        # Makes text versions for the parser
        elif file.endswith('.htm'):
            old_file = os.path.join(path, file)
            content = open(old_file, 'r').read()
            # eliminates extra title and leaves expected space at the top
            content = re.sub(r'<title>.+?</title>', '', content)
            # need to  eliminate particular blank lines, should sill get the tags out if expected line breaks aren't there.
            extras = ['<html>\n','<html>', '</html>', '<head>\n', '</head>\n', '<head>', '</head>', '<body><pre>\n', '<pre>', '</pre>', '<body>','</body>', ]
            for tag in extras:
                content = content.replace(tag, '')
            new_name = file[:-3] + 'txt'
            new_path = os.path.join(path, new_name)
            text_doc = open(new_path, 'w')
            text_doc = text_doc.write(content)
            file = new_name
            os.remove(old_file)

        if not file.endswith('.txt'):
            continue

        if kwargs.get('interactive', False):
            resp = raw_input("process file %s? (y/n/q) " % file)
            if resp == 'n':
                print 'skipping\n'
                continue
            elif resp == 'q':
                sys.exit()

        abspath = os.path.join(path, file)
        try:
            del kwargs['interactive']
        except:
            pass
        try:
            del kwargs['logdir']
        except:
            pass
        parser = CRParser(abspath, **kwargs)
        do_parse(parser, logfile)
		


	old_mods = os.path.join(path,"mods.xml")
	os.remove(old_mods)
	time.sleep(15)
    return kwargs['outdir']
def parse_single(infile, **kwargs):
    logfile = initialize_logfile(kwargs['logdir'])
    try:
        del kwargs['interactive']
    except:
        pass
    try:
        del kwargs['logdir']
    except:
        pass
    parser = CRParser(infile, **kwargs)
    do_parse(parser, logfile)

    return os.path.join(kwargs['outdir'], os.path.split(infile)[1].replace('.txt', '.xml'))
def parse_single(infile, **kwargs):
    logfile = initialize_logfile(kwargs['logdir'])
    try:
        del kwargs['interactive']
    except:
        pass
    try:
        del kwargs['logdir']
    except:
        pass
    parser = CRParser(infile, **kwargs)
    do_parse(parser, logfile)

    return os.path.join(kwargs['outdir'],
                        os.path.split(infile)[1].replace('.txt', '.xml'))
def parse_directory(path, **kwargs):
    logfile = initialize_logfile(kwargs['logdir'])
    for file in os.listdir(path):
        print file
        print path
        # we don't process the daily digest or front matter.
        if file.find('FrontMatter') != -1 or file.find('PgD') != -1:
            continue
        # Makes text versions for the parser
        elif file.endswith('.htm'):
            old_file = os.path.join(path, file)
            content = open(old_file, 'r').read()
            # eliminates extra title and leaves expected space at the top
            content = re.sub(r'<title>.+?</title>', '', content)
            # need to  eliminate particular blank lines, should sill get the tags out if expected line breaks aren't there.
            extras = [
                '<html>\n',
                '<html>',
                '</html>',
                '<head>\n',
                '</head>\n',
                '<head>',
                '</head>',
                '<body><pre>\n',
                '<pre>',
                '</pre>',
                '<body>',
                '</body>',
            ]
            for tag in extras:
                content = content.replace(tag, '')
            new_name = file[:-3] + 'txt'
            new_path = os.path.join(path, new_name)
            text_doc = open(new_path, 'w')
            text_doc = text_doc.write(content)
            file = new_name
            os.remove(old_file)

        if not file.endswith('.txt'):
            continue

        if kwargs.get('interactive', False):
            resp = raw_input("process file %s? (y/n/q) " % file)
            if resp == 'n':
                print 'skipping\n'
                continue
            elif resp == 'q':
                sys.exit()

        abspath = os.path.join(path, file)
        try:
            del kwargs['interactive']
        except:
            pass
        try:
            del kwargs['logdir']
        except:
            pass
        parser = CRParser(abspath, **kwargs)
        do_parse(parser, logfile)

        old_mods = os.path.join(path, "mods.xml")
        os.remove(old_mods)
        time.sleep(15)
    return kwargs['outdir']