Python from_filenames 예제들, dask.bag.from_filenames Python 예제들

예제 #1

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

    assert raises(ValueError, lambda: db.from_filenames('non-existent-*-path'))

예제 #2

0

파일 보기

파일: test_bag.py 프로젝트: BabeNovelty/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

    assert raises(ValueError, lambda: db.from_filenames('non-existent-*-path'))

예제 #3

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = gzip.open(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == list(c)

예제 #4

0

파일 보기

파일: test_bag.py 프로젝트: BabeNovelty/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = gzip.open(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == [s.decode() for s in c]

예제 #5

0

파일 보기

파일: test_bag.py 프로젝트: rla3rd/dask

def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100, linesep='\n')
        c = db.from_filenames(fn, linesep='\n')
        assert len(b.dask) > 5
        assert list(b) == list(c)

예제 #6

0

파일 보기

파일: test_bag.py 프로젝트: rabernat/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'w') as f:
            f.write('Hello, world!\n' * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(b) == list(c)

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

예제 #7

0

파일 보기

파일: test_bag.py 프로젝트: BabeNovelty/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

예제 #8

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames_large():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(('Hello, world!' + os.linesep).encode() * 100)
        b = db.from_filenames(fn, chunkbytes=100)
        c = db.from_filenames(fn)
        assert len(b.dask) > 5
        assert list(map(str, b)) == list(map(str, c))

        d = db.from_filenames([fn], chunkbytes=100)
        assert list(b) == list(d)

예제 #9

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames_encoding():
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write((u'你好！' + os.linesep).encode('gb18030') * 100)
        b = db.from_filenames(fn, chunkbytes=100, encoding='gb18030')
        c = db.from_filenames(fn, encoding='gb18030')
        assert len(b.dask) > 5
        assert list(map(lambda x: x.encode('utf-8'), b)) == list(map(lambda x: x.encode('utf-8'), c))

        d = db.from_filenames([fn], chunkbytes=100, encoding='gb18030')
        assert list(b) == list(d)

예제 #10

0

파일 보기

파일: test_bag.py 프로젝트: rla3rd/dask

def test_gh715():
    bin_data = u'\u20ac'.encode('utf-8')
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(bin_data)
        a = db.from_filenames(fn)
        assert a.compute()[0] == bin_data.decode('utf-8')

예제 #11

0

파일 보기

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (bz2.BZ2File, os.path.abspath('foo.json.bz2'))),
        (list, (bz2.BZ2File, os.path.abspath('bar.json.bz2')))
    ]))

예제 #12

0

파일 보기

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) == set([
        (list, (gzip.open, os.path.abspath('foo.json.gz'))),
        (list, (gzip.open, os.path.abspath('bar.json.gz')))
    ]))

예제 #13

0

파일 보기

파일: core.py 프로젝트: treycausey/malort

def analyze(path, parse_timestamps=True, **kwargs):
    """
    Analyze a given directory of either .json or flat text files
    with delimited JSON to get relevant key statistics.

    Parameters
    ----------
    path: string
        Path to directory
    parse_timestamps: boolean, default True
        If True, will attempt to regex match ISO8601 formatted parse_timestamps
    kwargs:
        passed into json.loads. Here you can specify encoding, etc.
    """

    stats = {}

    start_time = time.time()
    file_list = [os.path.join(path, f) for f in os.listdir(path)]
    bag = db.from_filenames(file_list).map(json.loads)
    recur_partial = partial(recur_dict, parse_timestamps=parse_timestamps)
    stats = bag.fold(recur_partial, combine_stats, initial={}).compute()
    count = stats["total_records"]
    del stats["total_records"]

    elapsed = time.time() - start_time
    print('Malort run finished: {} JSON blobs analyzed in {} seconds.'
          .format(count, elapsed))
    return MalortResult(stats, count, elapsed)

예제 #14

0

파일 보기

파일: test_bag.py 프로젝트: melodylail/dask

def test_gh715():
    bin_data = u'\u20ac'.encode('utf-8')
    with tmpfile() as fn:
        with open(fn, 'wb') as f:
            f.write(bin_data)
        a = db.from_filenames(fn)
        assert a.compute()[0] == bin_data.decode('utf-8')

예제 #15

0

파일 보기

파일: core.py 프로젝트: sgmqs/malort

def analyze(path, parse_timestamps=True, **kwargs):
    """
    Analyze a given directory of either .json or flat text files
    with delimited JSON to get relevant key statistics.

    Parameters
    ----------
    path: string
        Path to directory
    parse_timestamps: boolean, default True
        If True, will attempt to regex match ISO8601 formatted parse_timestamps
    kwargs:
        passed into json.loads. Here you can specify encoding, etc.
    """

    stats = {}

    start_time = time.time()
    file_list = [os.path.join(path, f) for f in os.listdir(path)]
    bag = db.from_filenames(file_list).map(json.loads)
    recur_partial = partial(recur_dict, parse_timestamps=parse_timestamps)
    stats = bag.fold(recur_partial, combine_stats, initial={}).compute()
    count = stats["total_records"]
    del stats["total_records"]

    elapsed = time.time() - start_time
    print('Malort run finished: {} JSON blobs analyzed in {} seconds.'.format(
        count, elapsed))
    return MalortResult(stats, count, elapsed)

예제 #16

0

파일 보기

파일: test_bag.py 프로젝트: melodylail/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (decode_sequence, system_encoding,
                (bz2.BZ2File, os.path.abspath('foo.json.bz2'), 'rb'))),
        (list, (decode_sequence, system_encoding,
                (bz2.BZ2File, os.path.abspath('bar.json.bz2'), 'rb')))
    ]))

예제 #17

0

파일 보기

파일: test_bag.py 프로젝트: melodylail/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) == set([
        (list, (decode_sequence, system_encoding,
                (gzip.open, os.path.abspath('foo.json.gz'), 'rb'))),
        (list, (decode_sequence, system_encoding,
                (gzip.open, os.path.abspath('bar.json.gz'), 'rb')))
    ]))

예제 #18

0

파일 보기

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) == set([
        (list, (io.TextIOWrapper, (io.BufferedReader,
                                   (open, os.path.abspath('foo.json.bz2'),
                                    'rb', 'bz2')), system_encoding, None,
                os.linesep)),
        (list, (io.TextIOWrapper, (io.BufferedReader,
                                   (open, os.path.abspath('bar.json.bz2'),
                                    'rb', 'bz2')), system_encoding, None,
                os.linesep))
    ]))

예제 #19

0

파일 보기

파일: test_bag.py 프로젝트: rla3rd/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list,
                  (io.TextIOWrapper,
                   (io.BufferedReader,
                    (open, os.path.abspath('foo.json.bz2'), 'rb', 'bz2')),
                   system_encoding, None, os.linesep)),
                 (list,
                  (io.TextIOWrapper,
                   (io.BufferedReader,
                    (open, os.path.abspath('bar.json.bz2'), 'rb', 'bz2')),
                   system_encoding, None, os.linesep))]))

예제 #20

0

파일 보기

파일: test_bag.py 프로젝트: BabeNovelty/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list, (bz2.BZ2File, os.path.abspath('foo.json.bz2'))),
                 (list, (bz2.BZ2File, os.path.abspath('bar.json.bz2')))]))

예제 #21

0

파일 보기

파일: test_bag.py 프로젝트: BabeNovelty/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) ==
            set([(list, (gzip.open, os.path.abspath('foo.json.gz'))),
                 (list, (gzip.open, os.path.abspath('bar.json.gz')))]))

예제 #22

0

파일 보기

파일: test_bag.py 프로젝트: kastnerkyle/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')

예제 #23

0

파일 보기

파일: dask.py 프로젝트: TomAugspurger/odo

def bag_to_iterator(x, **kwargs):
    return db.from_filenames([tf.path for tf in x])

예제 #24

0

파일 보기

파일: dream.py 프로젝트: StuartAxelOwen/dream

 def of_files(self, filenames, chunkbytes=None):
     return Dream(*bag.from_filenames(filenames, chunkbytes)._args)

예제 #25

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames_bz2():
    b = db.from_filenames(['foo.json.bz2', 'bar.json.bz2'])

    assert (set(b.dask.values()) ==
            set([(list, (decode_sequence, system_encoding, (bz2.BZ2File, os.path.abspath('foo.json.bz2'), 'rb'))),
                 (list, (decode_sequence, system_encoding, (bz2.BZ2File, os.path.abspath('bar.json.bz2'), 'rb')))]))

예제 #26

0

파일 보기

파일: test_bag.py 프로젝트: fortiema/dask

def test_from_filenames_gzip():
    b = db.from_filenames(['foo.json.gz', 'bar.json.gz'])

    assert (set(b.dask.values()) ==
            set([(list, (decode_sequence, system_encoding, (gzip.open, os.path.abspath('foo.json.gz'), 'rb'))),
                 (list, (decode_sequence, system_encoding, (gzip.open, os.path.abspath('bar.json.gz'), 'rb')))]))

예제 #27

0

파일 보기

파일: dask.py 프로젝트: gyenney/Tools

def bag_to_iterator(x, **kwargs):
    return db.from_filenames([tf.path for tf in x])

예제 #28

0

파일 보기

파일: test_bag.py 프로젝트: minrk/dask

def test_from_filenames():
    with filetexts({'a1.log': 'A\nB', 'a2.log': 'C\nD'}) as fns:
        assert set(line.strip() for line in db.from_filenames(fns)) == \
                set('ABCD')
        assert set(line.strip() for line in db.from_filenames('a*.log')) == \
                set('ABCD')