def main():
    set_stdout_encoding()

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path', type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path', type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    parser.add_argument('-l', '--loglevel', '--log-level',
                        help='Logging verbosity level threshold (to stderr)',
                        default='info')
    concrete.version.add_argparse_argument(parser)
    args = parser.parse_args()

    logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',
                        level=args.loglevel.upper())

    # Won't work on Windows
    text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path
    concrete_path = (
        '/dev/fd/1' if args.concrete_path == '-' else args.concrete_path
    )
    annotation_level = args.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path, f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Exemple #2
0
def main():
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path',
                        type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path',
                        type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    concrete.version.add_argparse_argument(parser)
    ns = parser.parse_args()

    # Won't work on Windows... but that use case is very unlikely
    text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path
    concrete_path = ('/dev/fd/1'
                     if ns.concrete_path == '-' else ns.concrete_path)
    annotation_level = ns.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path,
                           f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Exemple #3
0
def main():
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path', type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path', type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    concrete.version.add_argparse_argument(parser)
    ns = parser.parse_args()

    # Won't work on Windows... but that use case is very unlikely
    text_path = '/dev/fd/0' if ns.text_path == '-' else ns.text_path
    concrete_path = (
        '/dev/fd/1' if ns.concrete_path == '-' else ns.concrete_path
    )
    annotation_level = ns.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path, f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)
Exemple #4
0
def main():
    set_stdout_encoding()

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert tarball of text files to'
                    ' tarball of concrete communications',
    )
    parser.set_defaults(annotation_level=AL_NONE,
                        log_level='INFO', log_interval=1000)
    parser.add_argument('text_tarball_path', type=str,
                        help='Input text tar file path (- for stdin)')
    parser.add_argument('concrete_tarball_path', type=str,
                        help='Output concrete tar file path (- for stdout)')
    parser.add_argument('--per-line', action='store_true',
                        help='Text files have one document per line (default:'
                             ' each text file is a document)')
    parser.add_argument('--log-interval', type=int,
                        help='Log an info message every log-interval docs')
    add_annotation_level_argparse_argument(parser)
    parser.add_argument('-l', '--loglevel', '--log-level',
                        help='Logging verbosity level threshold (to stderr)',
                        default='info')
    concrete.version.add_argparse_argument(parser)
    args = parser.parse_args()

    # Won't work on Windows
    text_tarball_path = (
        '/dev/fd/0'
        if args.text_tarball_path == '-'
        else args.text_tarball_path
    )
    concrete_tarball_path = (
        '/dev/fd/1'
        if args.concrete_tarball_path == '-'
        else args.concrete_tarball_path
    )
    per_line = args.per_line
    annotation_level = args.annotation_level

    logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',
                        level=args.loglevel.upper())

    with CommunicationWriterTGZ(concrete_tarball_path) as writer:
        for (i, comm) in enumerate(load(text_tarball_path, per_line,
                                        annotation_level)):
            if (i + 1) % args.log_interval == 0:
                logging.info(u'writing doc %d (%s)...' % (i + 1, comm.id))
            writer.write(comm, comm.id)
def main():
    set_stdout_encoding()

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatter,
        description='Convert text file to communication',
    )
    parser.set_defaults(annotation_level=AL_NONE)
    parser.add_argument('text_path',
                        type=str,
                        help='Input text file path (- for stdin)')
    parser.add_argument('concrete_path',
                        type=str,
                        help='Output concrete file path (- for stdout)')
    add_annotation_level_argparse_argument(parser)
    parser.add_argument('-l',
                        '--loglevel',
                        '--log-level',
                        help='Logging verbosity level threshold (to stderr)',
                        default='info')
    concrete.version.add_argparse_argument(parser)
    args = parser.parse_args()

    logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',
                        level=args.loglevel.upper())

    # Won't work on Windows
    text_path = '/dev/fd/0' if args.text_path == '-' else args.text_path
    concrete_path = ('/dev/fd/1'
                     if args.concrete_path == '-' else args.concrete_path)
    annotation_level = args.annotation_level

    with codecs.open(text_path, encoding='utf-8') as f:
        comm = create_comm(text_path,
                           f.read(),
                           annotation_level=annotation_level)
        write_communication_to_file(comm, concrete_path)