def main():

    # Argument Parser
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-txt",
        dest="txt",
        help="The files that contain the training examples",
    )

    parser.add_argument(
        "-annotations",
        dest="annotations",
        help="The files that contain the labels for the training examples",
    )

    parser.add_argument(
        "-out",
        dest="out",
        default=None,
        help="Directory to output data",
    )

    parser.add_argument(
        "-format",
        dest="format",
        help="Output format (%s)" % str(' or '.join(Note.supportedFormats())),
    )

    # Parse the command line arguments
    args = parser.parse_args()

    # Parse arguments
    txt = args.txt
    annotations = args.annotations
    out_file = args.out
    format = args.format

    # Ensure annotations are specified
    if not txt:
        print >> sys.stderr, '\n\tError: Must supply text file'
        print >> sys.stderr
        exit(1)
    elif not os.path.exists(txt):
        print >> sys.stderr, '\n\tError: Given text file does not exist'
        print >> sys.stderr
        exit(1)

    # Ensure annotations are specified
    extensions = Note.supportedFormatExtensions()
    if not annotations:
        print >> sys.stderr, '\n\tError: Must supply annotations'
        print >> sys.stderr
        exit(2)
    elif not os.path.exists(txt):
        print >> sys.stderr, '\n\tError: Given annotation file does not exist'
        print >> sys.stderr
        exit(2)
    elif os.path.splitext(annotations)[1][1:] not in extensions:
        print >> sys.stderr, '\n\tError: annotation must be a supported format'
        print >> sys.stderr, '\t\t(.%s)' % str(' or .'.join(extensions))
        print >> sys.stderr
        exit(2)

    # Ensure output format is specified
    if (not format) or (format not in Note.supportedFormats()):
        print >> sys.stderr, '\n\tError: Must specify supported output format'
        print >> sys.stderr, '\t\t(%s)' % str(' or '.join(
            Note.supportedFormats()))
        print >> sys.stderr
        exit(3)

    # Automatically find the input file format
    in_extension = os.path.splitext(annotations)[1][1:]
    for f, ext in Note.dictOfFormatToExtensions().items():
        if ext == in_extension:
            in_format = f

    # Read input data into note object
    in_note = Note(in_format)
    in_note.read(txt, annotations)

    # Convert data to standard format
    internal_output = in_note.write_standard()

    os_handle, tmp_file = tempfile.mkstemp(dir=tmp_dir, suffix="format_temp")
    with open(tmp_file, 'w') as f:
        f.write(internal_output)
    os.close(os_handle)

    #print internal_output

    # Read internal standard data into new file with given output format
    out_note = Note(format)
    out_note.read_standard(txt, tmp_file)

    # Output data
    out = out_note.write()
    if out_file:
        with open(out_file, 'w') as out_f:
            out_f.write(out)
    else:
        sys.stdout.write(out)

    # Clean up
    os.remove(tmp_file)
    if out_file:
        out_f.close()
Beispiel #2
0
def main():

    # Argument Parser
    parser = argparse.ArgumentParser()

    parser.add_argument("-t",
        dest = "txt",
        help = "The files that contain the training examples",
    )

    parser.add_argument("-a",
        dest = "annotations",
        help = "The files that contain the labels for the training examples",
    )

    parser.add_argument("-o",
        dest = "out",
        default = None,
        help = "Directory to output data",
    )

    parser.add_argument("-f",
        dest = "format",
        help = "Output format (%s)"%str(' or '.join(Note.supportedFormats())),
    )

    # Parse the command line arguments
    args = parser.parse_args()


    # Parse arguments
    txt         = args.txt
    annotations = args.annotations
    out_file    = args.out
    format      = args.format


    # Ensure annotations are specified
    if not txt:
        print >>sys.stderr, '\n\tError: Must supply text file'
        print >>sys.stderr
        exit(1)
    elif not os.path.exists(txt):
        print >>sys.stderr, '\n\tError: Given text file does not exist'
        print >>sys.stderr
        exit(1)

    # Ensure annotations are specified
    extensions = Note.supportedFormatExtensions()
    if not annotations:
        print >>sys.stderr, '\n\tError: Must supply annotations'
        print >>sys.stderr
        exit(2)
    elif not os.path.exists(txt):
        print >>sys.stderr, '\n\tError: Given annotation file does not exist'
        print >>sys.stderr
        exit(2)
    elif os.path.splitext(annotations)[1][1:] not in extensions:
        print >>sys.stderr, '\n\tError: annotation must be a supported format'
        print >>sys.stderr, '\t\t(.%s)' %str(' or .'.join(extensions) )
        print >>sys.stderr
        exit(2)

    # Ensure output format is specified
    if (not format) or (format not in Note.supportedFormats()):
        print >>sys.stderr, '\n\tError: Must specify supported output format'
        print >>sys.stderr, '\t\t(%s)' %str(' or '.join(Note.supportedFormats()))
        print >>sys.stderr
        exit(3)


    # Automatically find the input file format
    in_extension =  os.path.splitext(annotations)[1][1:]
    for f,ext in Note.dictOfFormatToExtensions().items():
        if ext == in_extension:
            in_format = f

    # Read input data into note object
    in_note = Note(in_format)
    in_note.read(txt,annotations)


    # Convert data to standard format
    internal_output = in_note.write_standard()

    os_handle,tmp_file = tempfile.mkstemp(dir=tmp_dir, suffix="format_temp")
    with open(tmp_file, 'w') as f:
        f.write(internal_output)
    os.close(os_handle)

    #print internal_output

    # Read internal standard data into new file with given output format
    out_note = Note(format)
    out_note.read_standard(txt,tmp_file)


    # Output data
    out = out_note.write()
    if out_file:
        with open(out_file, 'w') as out_f:
            out_f.write(out)
    else:
        sys.stdout.write(out)


    # Clean up
    os.remove(tmp_file)
    if out_file:
        out_f.close()