예제 #1
0
파일: data_models.py 프로젝트: rcarpa/rucio
    def dump(cls, rse, date='latest', filter_=None):
        filename = cls.download(rse, date)

        # Should check errors, content size at least
        file = smart_open(filename)

        return cls.each(file, rse, date, filter_)
예제 #2
0
def test_smart_open_for_bz2_file():
    fd, path = tempfile.mkstemp()
    comp = bz2file.BZ2Compressor()
    with os.fdopen(fd, 'wb') as f:
        f.write(comp.compress('abcdef'.encode()))
        f.write(comp.flush())
    assert not isinstance(dumper.smart_open(path), bz2file.BZ2File)
    os.unlink(path)
예제 #3
0
def parse_and_filter_file(filepath,
                          parser=lambda s: s,
                          filter_=lambda s: s,
                          prefix=None,
                          postfix='parsed',
                          cache_dir=DUMPS_CACHE_DIR):
    '''
    Opens `filepath` as a read-only file, and for each line of the file
    for which the `filter_` function returns True, it writes a version
    parsed with the `parser` function.

    The name of the output file is generated appending '_' + `postfix` to
    the filename in `filepath`. If `prefix` is given it is used instead
    of `filepath`.

    The output file (and temporary files while processing are stored in
    `cache_dir`.

    Default values for the arguments:
        - `parser`: returns the same string.
        - `filter_`: returns True for any argument.
        - `prefix`: None (the name of the input file is used as prefix).
        - `postfix`: 'parsed'.
        - `cache_dir`: DUMPS_CACHE_DIR.

    The output file is created with a random name and renamed atomically
    when it is complete.

    '\n' is appended to each line, therefore if the input is 'a\nb\n' and `parser`
    is not especified the output will be 'a\n\nb\n\n'
    '''

    prefix = os.path.basename(filepath) if prefix is None else prefix
    output_name = '_'.join((prefix, postfix))
    output_path = os.path.join(cache_dir, output_name)

    if os.path.exists(output_path):
        return output_path

    with dumper.temp_file(cache_dir, final_name=output_name) as (output, _):
        input_ = dumper.smart_open(filepath)
        for line in input_:
            if filter_(line):
                output.write(parser(line) + '\n')

        input_.close()

    return output_path
예제 #4
0
def test_smart_open_for_text_file():
    tmp = make_temp_file('/tmp', 'abcdef')
    assert hasattr(dumper.smart_open(tmp), 'read')  # check if object is file - python2/3 compatibility
    os.unlink(tmp)
예제 #5
0
파일: test_dumper.py 프로젝트: zzaiin/Rucio
def test_smart_open_for_text_file():
    tmp = make_temp_file('/tmp', 'abcdef')
    ok_(isinstance(dumper.smart_open(tmp), __builtin__.file))
    os.unlink(tmp)