def main(args):
    import optparse
    p = optparse.OptionParser(
        '%prog [INFILE [OUTFILE]]')

    opts, args = p.parse_args(args)
    if len(args) > 2:
        sys.stderr.write('We only support 2 filenames, not %d\n' % (len(args),))
        return -1

    cleanups = []
    try:
        if len(args) == 0:
            infile = sys.stdin
            insize = None
            outfile = sys.stdout
        else:
            infile, cleanup = files.open_file(args[0])
            if cleanup is not None:
                cleanups.append(cleanup)
            if isinstance(infile, file):
                # pipes are files, but 0 isn't useful.
                insize = os.fstat(infile.fileno()).st_size or None
            else:
                insize = None
            if len(args) == 1:
                outfile = sys.stdout
            else:
                outfile = open(args[1], 'wb')
        strip_duplicate(infile, outfile, insize)
    finally:
        for cleanup in cleanups:
            cleanup()
def main(args):
    import optparse
    p = optparse.OptionParser('%prog [INFILE [OUTFILE]]')

    opts, args = p.parse_args(args)
    if len(args) > 2:
        sys.stderr.write('We only support 2 filenames, not %d\n' %
                         (len(args), ))
        return -1

    cleanups = []
    try:
        if len(args) == 0:
            infile = sys.stdin
            insize = None
            outfile = sys.stdout
        else:
            infile, cleanup = files.open_file(args[0])
            if cleanup is not None:
                cleanups.append(cleanup)
            if isinstance(infile, file):
                # pipes are files, but 0 isn't useful.
                insize = os.fstat(infile.fileno()).st_size or None
            else:
                insize = None
            if len(args) == 1:
                outfile = sys.stdout
            else:
                outfile = open(args[1], 'wb')
        strip_duplicate(infile, outfile, insize)
    finally:
        for cleanup in cleanups:
            cleanup()
Beispiel #3
0
def load(source,
         using_json=None,
         show_prog=True,
         collapse=True,
         max_parents=None):
    """Load objects from the given source.

    :param source: If this is a string, we will open it as a file and read all
        objects. For any other type, we will simply iterate and parse objects
        out, so the object should be an iterator of json lines.
    :param using_json: Use simplejson rather than the regex. This allows
        arbitrary ordered json dicts to be parsed but still requires per-line
        layout. Set to 'False' to indicate you want to use the regex, set to
        'True' to force using simplejson. None will probe to see if simplejson
        is available, and use it if it is. (With _speedups built, simplejson
        parses faster and more accurately than the regex.)
    :param show_prog: If True, display the progress as we read in data
    :param collapse: If True, run collapse_instance_dicts() after loading.
    :param max_parents: See ObjManager.__init__(max_parents)
    """
    cleanup = None
    if isinstance(source, str):
        source, cleanup = files.open_file(source)
        if isinstance(source, file):
            input_size = os.fstat(source.fileno()).st_size
        else:
            input_size = 0
    elif isinstance(source, (list, tuple)):
        input_size = sum(map(len, source))
    else:
        input_size = 0
    if using_json is None:
        using_json = (simplejson is not None)
    try:
        manager = _load(source,
                        using_json,
                        show_prog,
                        input_size,
                        max_parents=max_parents)
    finally:
        if cleanup is not None:
            cleanup()
    if collapse:
        tstart = time.time()
        if not manager.collapse_instance_dicts():
            manager.compute_parents()
        if show_prog:
            tend = time.time()
            sys.stderr.write('collapsed in %.1fs\n' % (tend - tstart, ))
    return manager
Beispiel #4
0
def load(source, using_json=None, show_prog=True, collapse=True,
         max_parents=None):
    """Load objects from the given source.

    :param source: If this is a string, we will open it as a file and read all
        objects. For any other type, we will simply iterate and parse objects
        out, so the object should be an iterator of json lines.
    :param using_json: Use simplejson rather than the regex. This allows
        arbitrary ordered json dicts to be parsed but still requires per-line
        layout. Set to 'False' to indicate you want to use the regex, set to
        'True' to force using simplejson. None will probe to see if simplejson
        is available, and use it if it is. (With _speedups built, simplejson
        parses faster and more accurately than the regex.)
    :param show_prog: If True, display the progress as we read in data
    :param collapse: If True, run collapse_instance_dicts() after loading.
    :param max_parents: See ObjManager.__init__(max_parents)
    """
    cleanup = None
    if isinstance(source, str):
        source, cleanup = files.open_file(source)
        if isinstance(source, file):
            input_size = os.fstat(source.fileno()).st_size
        else:
            input_size = 0
    elif isinstance(source, (list, tuple)):
        input_size = sum(map(len, source))
    else:
        input_size = 0
    if using_json is None:
        using_json = (simplejson is not None)
    try:
        manager = _load(source, using_json, show_prog, input_size,
                        max_parents=max_parents)
    finally:
        if cleanup is not None:
            cleanup()
    if collapse:
        tstart = time.time()
        if not manager.collapse_instance_dicts():
            manager.compute_parents()
        if show_prog:
            tend = time.time()
            sys.stderr.write('collapsed in %.1fs\n'
                             % (tend - tstart,))
    return manager
 def source():
     infile, cleanup = files.open_file(args[0])
     for obj in loader.iter_objs(infile):
         yield obj
     cleanup()
 def source():
     infile, cleanup = files.open_file(args[0])
     for obj in loader.iter_objs(infile):
         yield obj
     cleanup()