Exemple #1
0
def main():
    parser = optparse.OptionParser()
    parser.add_option(
        '-b', '--buffer',
        dest='buffer',
        type='int', default=1000000,
        help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.'
    )
    parser.add_option(
        '-d', '--index_depth',
        dest='index_depth',
        type='int', default=3,
        help='Depth to use on filebased offset indexing. Default: 3.'
    )
    parser.add_option(
        '-p', '--keep_partial',
        action='store_true',
        dest='keep_partial',
        default=False,
        help='Keep rows in first input which are missing identifiers.')
    parser.add_option(
        '-u', '--keep_unmatched',
        action='store_true',
        dest='keep_unmatched',
        default=False,
        help='Keep rows in first input which are not joined with the second input.')
    parser.add_option(
        '-f', '--fill_options_file',
        dest='fill_options_file',
        type='str', default=None,
        help='Fill empty columns with a values from a JSONified file.')

    options, args = parser.parse_args()

    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch( **stringify_dictionary_keys( json.load( open( options.fill_options_file ) ) ) )  # json.load( open( options.fill_options_file ) )
        except Exception as e:
            print("Warning: Ignoring fill options due to json error (%s)." % e)
    if fill_options is None:
        fill_options = Bunch()
    if 'fill_unjoined_only' not in fill_options:
        fill_options.fill_unjoined_only = True
    if 'file1_columns' not in fill_options:
        fill_options.file1_columns = None
    if 'file2_columns' not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int( args[2] ) - 1
        column2 = int( args[3] ) - 1
        out_filename = args[4]
    except:
        print("Error parsing command line.", file=sys.stderr)
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files( filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.index_depth, fill_options=fill_options )
Exemple #2
0
    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(
                **stringify_dictionary_keys(json.load(open(options.fill_options_file)))
            )  # json.load( open( options.fill_options_file ) )
        except Exception, e:
            print "Warning: Ignoring fill options due to json error (%s)." % e
    if fill_options is None:
        fill_options = Bunch()
    if "fill_unjoined_only" not in fill_options:
        fill_options.fill_unjoined_only = True
    if "file1_columns" not in fill_options:
        fill_options.file1_columns = None
    if "file2_columns" not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except:
        print >> sys.stderr, "Error parsing command line."
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files(
Exemple #3
0
    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(**stringify_dictionary_keys(
                json.load(open(options.fill_options_file))
            ))  # json.load( open( options.fill_options_file ) )
        except Exception, e:
            print "Warning: Ignoring fill options due to json error (%s)." % e
    if fill_options is None:
        fill_options = Bunch()
    if 'fill_unjoined_only' not in fill_options:
        fill_options.fill_unjoined_only = True
    if 'file1_columns' not in fill_options:
        fill_options.file1_columns = None
    if 'file2_columns' not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except:
        print >> sys.stderr, "Error parsing command line."
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files(filename1,
Exemple #4
0
def main():
    parser = optparse.OptionParser()
    parser.add_option(
        '-b', '--buffer',
        dest='buffer',
        type='int', default=1000000,
        help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.'
    )
    parser.add_option(
        '-d', '--index_depth',
        dest='index_depth',
        type='int', default=3,
        help='Depth to use on filebased offset indexing. Default: 3.'
    )
    parser.add_option(
        '-p', '--keep_partial',
        action='store_true',
        dest='keep_partial',
        default=False,
        help='Keep rows in first input which are missing identifiers.')
    parser.add_option(
        '-u', '--keep_unmatched',
        action='store_true',
        dest='keep_unmatched',
        default=False,
        help='Keep rows in first input which are not joined with the second input.')
    parser.add_option(
        '-f', '--fill_options_file',
        dest='fill_options_file',
        type='str', default=None,
        help='Fill empty columns with a values from a JSONified file.')
    parser.add_option(
        '-H', '--keep_headers',
        action='store_true',
        dest='keep_headers',
        default=False,
        help='Keep the headers')

    options, args = parser.parse_args()

    fill_options = None
    if options.fill_options_file is not None:
        try:
            fill_options = Bunch(**stringify_dictionary_keys(json.load(open(options.fill_options_file))))  # json.load( open( options.fill_options_file ) )
        except Exception as e:
            print("Warning: Ignoring fill options due to json error (%s)." % e)
    if fill_options is None:
        fill_options = Bunch()
    if 'fill_unjoined_only' not in fill_options:
        fill_options.fill_unjoined_only = True
    if 'file1_columns' not in fill_options:
        fill_options.file1_columns = None
    if 'file2_columns' not in fill_options:
        fill_options.file2_columns = None

    try:
        filename1 = args[0]
        filename2 = args[1]
        column1 = int(args[2]) - 1
        column2 = int(args[3]) - 1
        out_filename = args[4]
    except Exception:
        print("Error parsing command line.", file=sys.stderr)
        sys.exit()

    # Character for splitting fields and joining lines
    split = "\t"

    return join_files(filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.keep_headers, options.index_depth, fill_options=fill_options)