def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='C53 xml to text',
                    input_description='XML files (via antiword)',
                    glob='*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
Exemple #2
0
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='Fine Rolls xml to text',
                    input_description='XML files (manually annotated TEI)',
                    glob='roll*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='Fine Rolls xml to text',
                    input_description='XML files (manually annotated TEI)',
                    glob='roll*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='TTT petitions converter',
                    input_description='.dat files',
                    glob='*.dat')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
Exemple #5
0
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='C53 xml to text',
                    input_description='XML files (via antiword)',
                    glob='*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='state papers to text',
                    input_description='XML files',
                    glob='*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, _do_file, args)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='state papers to text',
                    input_description='XML files',
                    glob='*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, _do_file, args)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='TTT petitions converter',
                    input_description='.dat files',
                    glob='*.dat')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    generic_main(cfg, convert, args)
def main():
    "read cli args, loop on dir"
    cfg = CliConfig(description='annotations converter',
                    input_description='annotated text files',
                    glob='*')
    psr = iodir_argparser(cfg)
    psr.add_argument('--format', choices=['human', 'gate'],
                     default='human',
                     help='input markup format')
    args = psr.parse_args()
    generic_main(cfg, mk_converter(args.format), args)
def main():
    "read cli args, loop on dir"
    cfg = CliConfig(description='crude annotations viewer',
                    input_description='annotation json',
                    glob='*')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    output_dir = args.output
    for root, _, files in os.walk(args.input):
        root_subpath = fp.relpath(root, args.input)
        oroot = fp.join(output_dir, root_subpath)
        if not fp.exists(oroot):
            os.makedirs(oroot)
        for bname in files:
            save_occurrences(root, oroot, bname)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='TTT petitions metadata extractor',
                    input_description='.dat files',
                    glob='*.dat')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    values = defaultdict(set)
    for filename in glob.glob(fp.join(args.input, cfg.glob)):
        with codecs.open(filename, 'r', 'iso8859-1') as stream:
            content = stream.read()
            for key, val in _TAG_RE.findall(content):
                values[key].add(val)
    for key, val in values.items():
        ofile = fp.join(args.output, key)
        with codecs.open(ofile, 'w', 'utf-8') as stream:
            print("\n".join(sorted(val)), file=stream)
Exemple #12
0
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='Fine Rolls xml to text',
                    input_description='XML files (manually annotated TEI)',
                    glob='roll*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    values = collections.defaultdict(set)
    for ifile in glob.glob(fp.join(args.input, cfg.glob)):
        tree = ET.parse(ifile)
        for tag in _TAGS:
            for node in tree.iter(tag):
                txt = node.text or ''
                txt = ' '.join(txt.split())
                values[tag].add(txt)
    if not fp.exists(args.output):
        os.makedirs(args.output)
    for key, vals in values.items():
        ofile = fp.join(args.output, key)
        with codecs.open(ofile, 'w', 'utf-8') as stream:
            print("\n".join(sorted(vals)), file=stream)
def main():
    """
    Read input dir, dump in output dir
    """
    cfg = CliConfig(description='Fine Rolls xml to text',
                    input_description='XML files (manually annotated TEI)',
                    glob='roll*.xml')
    psr = iodir_argparser(cfg)
    args = psr.parse_args()
    values = collections.defaultdict(set)
    for ifile in glob.glob(fp.join(args.input, cfg.glob)):
        tree = ET.parse(ifile)
        for tag in _TAGS:
            for node in tree.iter(tag):
                txt = node.text or ''
                txt = ' '.join(txt.split())
                values[tag].add(txt)
    if not fp.exists(args.output):
        os.makedirs(args.output)
    for key, vals in values.items():
        ofile = fp.join(args.output, key)
        with codecs.open(ofile, 'w', 'utf-8') as stream:
            print("\n".join(sorted(vals)), file=stream)