def main(): parser = optparse.OptionParser() parser.add_option( '-b', '--buffer', dest='buffer', type='int', default=1000000, help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.' ) parser.add_option( '-d', '--index_depth', dest='index_depth', type='int', default=3, help='Depth to use on filebased offset indexing. Default: 3.' ) parser.add_option( '-p', '--keep_partial', action='store_true', dest='keep_partial', default=False, help='Keep rows in first input which are missing identifiers.') parser.add_option( '-u', '--keep_unmatched', action='store_true', dest='keep_unmatched', default=False, help='Keep rows in first input which are not joined with the second input.') parser.add_option( '-f', '--fill_options_file', dest='fill_options_file', type='str', default=None, help='Fill empty columns with a values from a JSONified file.') options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch( **stringify_dictionary_keys( json.load( open( options.fill_options_file ) ) ) ) # json.load( open( options.fill_options_file ) ) except Exception as e: print("Warning: Ignoring fill options due to json error (%s)." % e) if fill_options is None: fill_options = Bunch() if 'fill_unjoined_only' not in fill_options: fill_options.fill_unjoined_only = True if 'file1_columns' not in fill_options: fill_options.file1_columns = None if 'file2_columns' not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int( args[2] ) - 1 column2 = int( args[3] ) - 1 out_filename = args[4] except: print("Error parsing command line.", file=sys.stderr) sys.exit() # Character for splitting fields and joining lines split = "\t" return join_files( filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.index_depth, fill_options=fill_options )
) options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch( **stringify_dictionary_keys(json.load(open(options.fill_options_file))) ) # json.load( open( options.fill_options_file ) ) except Exception, e: print "Warning: Ignoring fill options due to json error (%s)." % e if fill_options is None: fill_options = Bunch() if "fill_unjoined_only" not in fill_options: fill_options.fill_unjoined_only = True if "file1_columns" not in fill_options: fill_options.file1_columns = None if "file2_columns" not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except: print >> sys.stderr, "Error parsing command line." sys.exit()
help='Fill empty columns with a values from a JSONified file.') options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch(**stringify_dictionary_keys( json.load(open(options.fill_options_file)) )) # json.load( open( options.fill_options_file ) ) except Exception, e: print "Warning: Ignoring fill options due to json error (%s)." % e if fill_options is None: fill_options = Bunch() if 'fill_unjoined_only' not in fill_options: fill_options.fill_unjoined_only = True if 'file1_columns' not in fill_options: fill_options.file1_columns = None if 'file2_columns' not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except: print >> sys.stderr, "Error parsing command line." sys.exit()
def main(): parser = optparse.OptionParser() parser.add_option( '-b', '--buffer', dest='buffer', type='int', default=1000000, help='Number of lines to buffer at a time. Default: 1,000,000 lines. A buffer of 0 will attempt to use memory only.' ) parser.add_option( '-d', '--index_depth', dest='index_depth', type='int', default=3, help='Depth to use on filebased offset indexing. Default: 3.' ) parser.add_option( '-p', '--keep_partial', action='store_true', dest='keep_partial', default=False, help='Keep rows in first input which are missing identifiers.') parser.add_option( '-u', '--keep_unmatched', action='store_true', dest='keep_unmatched', default=False, help='Keep rows in first input which are not joined with the second input.') parser.add_option( '-f', '--fill_options_file', dest='fill_options_file', type='str', default=None, help='Fill empty columns with a values from a JSONified file.') parser.add_option( '-H', '--keep_headers', action='store_true', dest='keep_headers', default=False, help='Keep the headers') options, args = parser.parse_args() fill_options = None if options.fill_options_file is not None: try: fill_options = Bunch(**stringify_dictionary_keys(json.load(open(options.fill_options_file)))) # json.load( open( options.fill_options_file ) ) except Exception as e: print("Warning: Ignoring fill options due to json error (%s)." % e) if fill_options is None: fill_options = Bunch() if 'fill_unjoined_only' not in fill_options: fill_options.fill_unjoined_only = True if 'file1_columns' not in fill_options: fill_options.file1_columns = None if 'file2_columns' not in fill_options: fill_options.file2_columns = None try: filename1 = args[0] filename2 = args[1] column1 = int(args[2]) - 1 column2 = int(args[3]) - 1 out_filename = args[4] except Exception: print("Error parsing command line.", file=sys.stderr) sys.exit() # Character for splitting fields and joining lines split = "\t" return join_files(filename1, column1, filename2, column2, out_filename, split, options.buffer, options.keep_unmatched, options.keep_partial, options.keep_headers, options.index_depth, fill_options=fill_options)