def check_iofiles(read_filenames, write_filenames): """ check for ability to open input/output filenames """ if read_filenames is not None: for filename in read_filenames: try: inputfh = open(filename, 'r') inputfh.close() except FileNotFoundError: eprint( " => ERROR: Cannot open file '{}' for reading".format( filename)) sys.exit(2) if write_filenames is not None: for filename in write_filenames: if os.path.isfile(filename): eprint(" => ERROR: file '{}' exists".format(filename)) eprint(" please remove it as we refuse to overwrite it!") sys.exit(1) try: myoutputfh = open(filename, 'w') myoutputfh.close() except PermissionError: eprint( " => ERROR: Cannot open file '{}' for writing".format( filename)) sys.exit(1) #eprint("deleting {}".format(filename)) #DEBUG delete_files([filename])
def check_files(): """ check for ability to open input/output filenames """ check_iofiles([args.input_filename], []) args.outff_filename = None if args.encrypt: args.outff_filename = "{}.{}".format(args.input_filename, "enc") elif args.compress: #if compressed but not encrypted args.outff_filename = "{}.{}".format(args.input_filename, "xz") if args.outff_filename is not None: check_iofiles([], [args.outff_filename]) if args.threads > 1: #multithread os.mkdir(args.mt_subfiles_dir, mode=0o700) args.chunk_itempfiles = list() for chunknum in range(args.chunks_count): chunknumstr = str(chunknum).zfill(len(str( args.chunks_count))) #e.g. 00, 01.. chunk_tempfile = args.mt_subfiles_iprefix + "." + chunknumstr try: myoutputfh = open(chunk_tempfile, 'w') except PermissionError: eprint( " => ERROR: Cannot open temporary file '{}' for writing" .format(chunk_tempfile)) sys.exit(1) args.chunk_itempfiles.append(chunk_tempfile) #store temp filenames myoutputfh.close() delete_files(args.chunk_itempfiles) #for chunknum in range(args.chunks_count): #DEBUG! # eprint(" >> the temporary index file for chunk #{} will be '{}'".format( # chunknum, args.chunk_itempfiles[chunknum])) #if outff needs to be generated if args.compress or args.encrypt: args.chunk_otempfiles = list() for chunknum in range(args.chunks_count): chunknumstr = str(chunknum).zfill(len(str( args.chunks_count))) #e.g. 00, 01.. chunk_tempfile = args.mt_subfiles_oprefix + "." + chunknumstr try: myoutputfh = open(chunk_tempfile, 'w') except PermissionError: eprint( " => ERROR: Cannot open temporary file '{}' for writing" .format(chunk_tempfile)) sys.exit(1) args.chunk_otempfiles.append( chunk_tempfile) #store temp filenames myoutputfh.close() delete_files(args.chunk_otempfiles)
def check_files(): """ do some checks on availability of resources """ check_iofiles([args.flatfile], []) check_iofiles([], [args.output_filename, args.outindex_filename]) if args.threads > 1: #multithread os.mkdir(args.mt_subfiles_dir, mode=0o700) args.chunk_itempfiles = list() for chunknum in range(args.chunks_count): chunknumstr = str(chunknum).zfill(len(str( args.chunks_count))) #e.g. 00, 01.. chunk_tempfile = args.mt_subfiles_iprefix + "." + chunknumstr try: myoutputfh = open(chunk_tempfile, 'w') except PermissionError: eprint( " => ERROR: Cannot open temporary file '{}' for writing" .format(chunk_tempfile)) sys.exit(1) args.chunk_itempfiles.append(chunk_tempfile) #store temp filenames myoutputfh.close() delete_files(args.chunk_itempfiles) args.chunk_otempfiles = list() for chunknum in range(args.chunks_count): chunknumstr = str(chunknum).zfill(len(str( args.chunks_count))) #e.g. 00, 01.. chunk_tempfile = args.mt_subfiles_oprefix + "." + chunknumstr try: myoutputfh = open(chunk_tempfile, 'w') except PermissionError: eprint( " => ERROR: Cannot open temporary file '{}' for writing" .format(chunk_tempfile)) sys.exit(1) args.chunk_otempfiles.append(chunk_tempfile) #store temp filenames myoutputfh.close() delete_files(args.chunk_otempfiles)
#init threads pool = Pool(args.threads, initializer=init_thread, initargs=(indexes_count, reindexed_count, deleted_count)) #submit chunks to threads if args.progressbar: _ = list( tqdm(pool.imap(update_index_wrapper, range(args.chunks_count)), total=args.chunks_count, ascii=PROGRESSBARCHARS)) else: pool.imap(update_index_wrapper, range(args.chunks_count)) pool.close() #no more work to submit pool.join() #wait workers to finish delete_files(args.chunk_itempfiles) #cleanup #final union of results: combine shifted index chunks with open(args.outindex_filename, 'wb') as outputfh: print_subfiles(args.chunk_otempfiles, outputfh) delete_files(args.chunk_otempfiles) #cleanup else: indexes_count = [0] deleted_count = [0] reindexed_count = [0] update_index_wrapper(0) print_stats(start_secs)
if args.verbose: eprint(" |-- new flatfile created: '{}'".format( args.outff_filename)) else: #updatemode os.replace(args.outindex_filename, args.index_filename) if args.verbose: eprint(" '-- original indexfile updated: '{}'".format( args.index_filename)) append_to_buffered(args.ff_filename, args.newentries_filename) if args.gzip: #compress flatfile if args.verbose: eprint( " |-- compressing updated flatfile.. this may take some time.." ) args.ff_filename = compress_file(args.ff_filename, create_gzindex=True) if args.verbose: eprint(" |-- original flatfile updated and compressed: '{}'". format(args.ff_filename)) else: if args.verbose: eprint(" |-- original flatfile updated: '{}'".format( args.ff_filename)) #final cleanup, if desired if args.deleteafter: delete_files([args.newentries_filename, args.newindex_filename]) #final printout eprint(" '-- merged {} new entries".format(new_identifiers_count))