Ejemplo n.º 1
0
def check_iofiles(read_filenames, write_filenames):
    """
    check for ability to open input/output filenames
    """
    if read_filenames is not None:
        for filename in read_filenames:
            try:
                inputfh = open(filename, 'r')
                inputfh.close()
            except FileNotFoundError:
                eprint(
                    "    => ERROR: Cannot open file '{}' for reading".format(
                        filename))
                sys.exit(2)

    if write_filenames is not None:
        for filename in write_filenames:
            if os.path.isfile(filename):
                eprint("    => ERROR: file '{}' exists".format(filename))
                eprint("       please remove it as we refuse to overwrite it!")
                sys.exit(1)
            try:
                myoutputfh = open(filename, 'w')
                myoutputfh.close()
            except PermissionError:
                eprint(
                    "    => ERROR: Cannot open file '{}' for writing".format(
                        filename))
                sys.exit(1)
            #eprint("deleting {}".format(filename)) #DEBUG
            delete_files([filename])
Ejemplo n.º 2
0
def check_files():
    """
    check for ability to open input/output filenames
    """
    check_iofiles([args.input_filename], [])

    args.outff_filename = None
    if args.encrypt:
        args.outff_filename = "{}.{}".format(args.input_filename, "enc")
    elif args.compress:  #if compressed but not encrypted
        args.outff_filename = "{}.{}".format(args.input_filename, "xz")

    if args.outff_filename is not None:
        check_iofiles([], [args.outff_filename])

    if args.threads > 1:  #multithread
        os.mkdir(args.mt_subfiles_dir, mode=0o700)
        args.chunk_itempfiles = list()
        for chunknum in range(args.chunks_count):
            chunknumstr = str(chunknum).zfill(len(str(
                args.chunks_count)))  #e.g. 00, 01..
            chunk_tempfile = args.mt_subfiles_iprefix + "." + chunknumstr
            try:
                myoutputfh = open(chunk_tempfile, 'w')
            except PermissionError:
                eprint(
                    "    => ERROR: Cannot open temporary file '{}' for writing"
                    .format(chunk_tempfile))
                sys.exit(1)
            args.chunk_itempfiles.append(chunk_tempfile)  #store temp filenames
            myoutputfh.close()
        delete_files(args.chunk_itempfiles)
        #for chunknum in range(args.chunks_count): #DEBUG!
        #   eprint(" >> the temporary index file for chunk #{} will be '{}'".format(
        #       chunknum, args.chunk_itempfiles[chunknum]))

        #if outff needs to be generated
        if args.compress or args.encrypt:
            args.chunk_otempfiles = list()
            for chunknum in range(args.chunks_count):
                chunknumstr = str(chunknum).zfill(len(str(
                    args.chunks_count)))  #e.g. 00, 01..
                chunk_tempfile = args.mt_subfiles_oprefix + "." + chunknumstr
                try:
                    myoutputfh = open(chunk_tempfile, 'w')
                except PermissionError:
                    eprint(
                        "    => ERROR: Cannot open temporary file '{}' for writing"
                        .format(chunk_tempfile))
                    sys.exit(1)
                args.chunk_otempfiles.append(
                    chunk_tempfile)  #store temp filenames
                myoutputfh.close()
            delete_files(args.chunk_otempfiles)
Ejemplo n.º 3
0
def check_files():
    """
    do some checks on availability of resources
    """
    check_iofiles([args.flatfile], [])
    check_iofiles([], [args.output_filename, args.outindex_filename])
    if args.threads > 1:  #multithread
        os.mkdir(args.mt_subfiles_dir, mode=0o700)
        args.chunk_itempfiles = list()
        for chunknum in range(args.chunks_count):
            chunknumstr = str(chunknum).zfill(len(str(
                args.chunks_count)))  #e.g. 00, 01..
            chunk_tempfile = args.mt_subfiles_iprefix + "." + chunknumstr
            try:
                myoutputfh = open(chunk_tempfile, 'w')
            except PermissionError:
                eprint(
                    "    => ERROR: Cannot open temporary file '{}' for writing"
                    .format(chunk_tempfile))
                sys.exit(1)
            args.chunk_itempfiles.append(chunk_tempfile)  #store temp filenames
            myoutputfh.close()
        delete_files(args.chunk_itempfiles)

        args.chunk_otempfiles = list()
        for chunknum in range(args.chunks_count):
            chunknumstr = str(chunknum).zfill(len(str(
                args.chunks_count)))  #e.g. 00, 01..
            chunk_tempfile = args.mt_subfiles_oprefix + "." + chunknumstr
            try:
                myoutputfh = open(chunk_tempfile, 'w')
            except PermissionError:
                eprint(
                    "    => ERROR: Cannot open temporary file '{}' for writing"
                    .format(chunk_tempfile))
                sys.exit(1)
            args.chunk_otempfiles.append(chunk_tempfile)  #store temp filenames
            myoutputfh.close()
        delete_files(args.chunk_otempfiles)
Ejemplo n.º 4
0
        #init threads
        pool = Pool(args.threads,
                    initializer=init_thread,
                    initargs=(indexes_count, reindexed_count, deleted_count))
        #submit chunks to threads
        if args.progressbar:
            _ = list(
                tqdm(pool.imap(update_index_wrapper, range(args.chunks_count)),
                     total=args.chunks_count,
                     ascii=PROGRESSBARCHARS))
        else:
            pool.imap(update_index_wrapper, range(args.chunks_count))
        pool.close()  #no more work to submit
        pool.join()  #wait workers to finish

        delete_files(args.chunk_itempfiles)  #cleanup

        #final union of results: combine shifted index chunks
        with open(args.outindex_filename, 'wb') as outputfh:
            print_subfiles(args.chunk_otempfiles, outputfh)

        delete_files(args.chunk_otempfiles)  #cleanup
    else:
        indexes_count = [0]
        deleted_count = [0]
        reindexed_count = [0]
        update_index_wrapper(0)

    print_stats(start_secs)
Ejemplo n.º 5
0
            if args.verbose:
                eprint(" |-- new flatfile created: '{}'".format(
                    args.outff_filename))
    else:  #updatemode
        os.replace(args.outindex_filename, args.index_filename)
        if args.verbose:
            eprint(" '-- original indexfile updated: '{}'".format(
                args.index_filename))
        append_to_buffered(args.ff_filename, args.newentries_filename)
        if args.gzip:  #compress flatfile
            if args.verbose:
                eprint(
                    " |-- compressing updated flatfile.. this may take some time.."
                )
            args.ff_filename = compress_file(args.ff_filename,
                                             create_gzindex=True)
            if args.verbose:
                eprint(" |-- original flatfile updated and compressed: '{}'".
                       format(args.ff_filename))
        else:
            if args.verbose:
                eprint(" |-- original flatfile updated: '{}'".format(
                    args.ff_filename))

    #final cleanup, if desired
    if args.deleteafter:
        delete_files([args.newentries_filename, args.newindex_filename])

    #final printout
    eprint(" '-- merged {} new entries".format(new_identifiers_count))