def main(): set_stdout_encoding() parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Convert text file to communication', ) parser.set_defaults(annotation_level=AL_NONE) parser.add_argument('text_path', type=str, help='Input text file path (- for stdin)') parser.add_argument('concrete_path', type=str, help='Output concrete file path (- for stdout)') add_annotation_level_argparse_argument(parser) parser.add_argument('-l', '--loglevel', '--log-level', help='Logging verbosity level threshold (to stderr)', default='info') concrete.version.add_argparse_argument(parser) args = parser.parse_args() logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s', level=args.loglevel.upper()) # Won't work on Windows text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path concrete_path = ( '/dev/fd/1' if args.concrete_path == '-' else args.concrete_path ) annotation_level = args.annotation_level with codecs.open(text_path, encoding='utf-8') as f: comm = create_comm(text_path, f.read(), annotation_level=annotation_level) write_communication_to_file(comm, concrete_path)
def main(): parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Convert text file to communication', ) parser.set_defaults(annotation_level=AL_NONE) parser.add_argument('text_path', type=str, help='Input text file path (- for stdin)') parser.add_argument('concrete_path', type=str, help='Output concrete file path (- for stdout)') add_annotation_level_argparse_argument(parser) concrete.version.add_argparse_argument(parser) ns = parser.parse_args() # Won't work on Windows... but that use case is very unlikely text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path concrete_path = ('/dev/fd/1' if ns.concrete_path == '-' else ns.concrete_path) annotation_level = ns.annotation_level with codecs.open(text_path, encoding='utf-8') as f: comm = create_comm(text_path, f.read(), annotation_level=annotation_level) write_communication_to_file(comm, concrete_path)
def main(): parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Convert text file to communication', ) parser.set_defaults(annotation_level=AL_NONE) parser.add_argument('text_path', type=str, help='Input text file path (- for stdin)') parser.add_argument('concrete_path', type=str, help='Output concrete file path (- for stdout)') add_annotation_level_argparse_argument(parser) concrete.version.add_argparse_argument(parser) ns = parser.parse_args() # Won't work on Windows... but that use case is very unlikely text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path concrete_path = ( '/dev/fd/1' if ns.concrete_path == '-' else ns.concrete_path ) annotation_level = ns.annotation_level with codecs.open(text_path, encoding='utf-8') as f: comm = create_comm(text_path, f.read(), annotation_level=annotation_level) write_communication_to_file(comm, concrete_path)
def main(): set_stdout_encoding() parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Convert tarball of text files to' ' tarball of concrete communications', ) parser.set_defaults(annotation_level=AL_NONE, log_level='INFO', log_interval=1000) parser.add_argument('text_tarball_path', type=str, help='Input text tar file path (- for stdin)') parser.add_argument('concrete_tarball_path', type=str, help='Output concrete tar file path (- for stdout)') parser.add_argument('--per-line', action='store_true', help='Text files have one document per line (default:' ' each text file is a document)') parser.add_argument('--log-interval', type=int, help='Log an info message every log-interval docs') add_annotation_level_argparse_argument(parser) parser.add_argument('-l', '--loglevel', '--log-level', help='Logging verbosity level threshold (to stderr)', default='info') concrete.version.add_argparse_argument(parser) args = parser.parse_args() # Won't work on Windows text_tarball_path = ( '/dev/fd/0' if args.text_tarball_path == '-' else args.text_tarball_path ) concrete_tarball_path = ( '/dev/fd/1' if args.concrete_tarball_path == '-' else args.concrete_tarball_path ) per_line = args.per_line annotation_level = args.annotation_level logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s', level=args.loglevel.upper()) with CommunicationWriterTGZ(concrete_tarball_path) as writer: for (i, comm) in enumerate(load(text_tarball_path, per_line, annotation_level)): if (i + 1) % args.log_interval == 0: logging.info(u'writing doc %d (%s)...' % (i + 1, comm.id)) writer.write(comm, comm.id)
def main(): set_stdout_encoding() parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Convert text file to communication', ) parser.set_defaults(annotation_level=AL_NONE) parser.add_argument('text_path', type=str, help='Input text file path (- for stdin)') parser.add_argument('concrete_path', type=str, help='Output concrete file path (- for stdout)') add_annotation_level_argparse_argument(parser) parser.add_argument('-l', '--loglevel', '--log-level', help='Logging verbosity level threshold (to stderr)', default='info') concrete.version.add_argparse_argument(parser) args = parser.parse_args() logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s', level=args.loglevel.upper()) # Won't work on Windows text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path concrete_path = ('/dev/fd/1' if args.concrete_path == '-' else args.concrete_path) annotation_level = args.annotation_level with codecs.open(text_path, encoding='utf-8') as f: comm = create_comm(text_path, f.read(), annotation_level=annotation_level) write_communication_to_file(comm, concrete_path)