Beispiel #1
0
def cli_lines_with_dir(input_):
    try:
        lines = None
        tmp_dir = None
        tmp_dir = tempfile.mkdtemp()

        main([tmp_dir, input_])

        filename = cdx_filename(os.path.basename(input_))

        print(filename)

        with open(os.path.join(tmp_dir, filename), 'rb') as fh:
            lines = fh.read(8192).rstrip().split(b'\n')

    finally:
        try:
            if tmp_dir:
                shutil.rmtree(tmp_dir)
        except OSError as exc:
            if exc.errno != 2:
                raise

    if not lines:
        return

    # print first, last, num lines
    print(lines[1].decode('utf-8'))
    print(lines[-1].decode('utf-8'))
    print('Total: ' + str(len(lines)))
Beispiel #2
0
def cli_lines_with_dir(input_):
    try:
        lines = None
        tmp_dir = None
        tmp_dir = tempfile.mkdtemp()

        main([tmp_dir, input_])

        filename = cdx_filename(os.path.basename(input_))

        print filename

        with open(os.path.join(tmp_dir, filename), 'rb') as fh:
            lines = fh.read(8192).rstrip().split('\n')

    finally:
        try:
            if tmp_dir:
                shutil.rmtree(tmp_dir)
        except OSError as exc:
            if exc.errno != 2:
                raise

    if not lines:
        return

    # print first, last, num lines
    print (lines[1])
    print (lines[-1])
    print('Total: ' + str(len(lines)))
Beispiel #3
0
def cli_lines(cmds):
    buff = BytesIO()
    orig = sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else None
    sys.stdout.buffer = buff
    main(cmds)
    sys.stdout.buffer = orig
    lines = buff.getvalue().rstrip().split(b'\n')

    # print first, last, num lines
    print(lines[1].decode('utf-8'))
    print(lines[-1].decode('utf-8'))
    print('Total: ' + str(len(lines)))
Beispiel #4
0
def cli_lines(cmds):
    buff = BytesIO()
    orig = sys.stdout
    sys.stdout = buff
    main(cmds)
    sys.stdout = orig
    lines = buff.getvalue().rstrip().split('\n')

    # print first, last, num lines
    print(lines[1])
    print(lines[-1])
    print('Total: ' + str(len(lines)))
Beispiel #5
0
def cli_lines(cmds):
    buff = BytesIO()
    orig = sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else None
    sys.stdout.buffer = buff
    main(cmds)
    sys.stdout.buffer = orig
    lines = buff.getvalue().rstrip().split(b'\n')

    # print first, last, num lines
    print(lines[1].decode('utf-8'))
    print(lines[-1].decode('utf-8'))
    print('Total: ' + str(len(lines)))
Beispiel #6
0
def cli_lines(cmds):
    buff = BytesIO()
    orig = sys.stdout
    sys.stdout = buff
    main(cmds)
    sys.stdout = orig
    lines = buff.getvalue().rstrip().split('\n')

    # print first, last, num lines
    print(lines[1])
    print(lines[-1])
    print('Total: ' + str(len(lines)))
    def index_cdx(self, output_cdx, input_):
        """
        Output sorted, post-query resolving cdx from 'input_' warc(s)
        to 'output_cdx'. Write cdx to temp and rename to output_cdx
        when completed to ensure atomic updates of the cdx.
        """
        # Run cdx indexer
        temp_cdx = output_cdx + '.tmp.' + timestamp20()
        indexer_args = ['-s', '-p', temp_cdx, input_]

        try:
            cdxindexer.main(indexer_args)
        except Exception as exc:
            import traceback
            err_details = traceback.format_exc(exc)
            print err_details

            os.remove(temp_cdx)
            return False
        else:
            shutil.move(temp_cdx, output_cdx)
            return True
Beispiel #8
0
    def index_cdx(self, output_cdx, input_):
        """
        Output sorted, post-query resolving cdx from 'input_' warc(s)
        to 'output_cdx'. Write cdx to temp and rename to output_cdx
        when completed to ensure atomic updates of the cdx.
        """
        # Run cdx indexer
        temp_cdx = output_cdx + '.tmp.' + timestamp20()
        indexer_args = ['-s', '-p', temp_cdx, input_]

        try:
            cdxindexer.main(indexer_args)
        except Exception as exc:
            import traceback
            err_details = traceback.format_exc(exc)
            print err_details

            os.remove(temp_cdx)
            return False
        else:
            os.rename(temp_cdx, output_cdx)
            return True