def dump(cls, rse, date='latest', filter_=None): filename = cls.download(rse, date) # Should check errors, content size at least file = smart_open(filename) return cls.each(file, rse, date, filter_)
def test_smart_open_for_bz2_file(): fd, path = tempfile.mkstemp() comp = bz2file.BZ2Compressor() with os.fdopen(fd, 'wb') as f: f.write(comp.compress('abcdef'.encode())) f.write(comp.flush()) assert not isinstance(dumper.smart_open(path), bz2file.BZ2File) os.unlink(path)
def parse_and_filter_file(filepath, parser=lambda s: s, filter_=lambda s: s, prefix=None, postfix='parsed', cache_dir=DUMPS_CACHE_DIR): ''' Opens `filepath` as a read-only file, and for each line of the file for which the `filter_` function returns True, it writes a version parsed with the `parser` function. The name of the output file is generated appending '_' + `postfix` to the filename in `filepath`. If `prefix` is given it is used instead of `filepath`. The output file (and temporary files while processing are stored in `cache_dir`. Default values for the arguments: - `parser`: returns the same string. - `filter_`: returns True for any argument. - `prefix`: None (the name of the input file is used as prefix). - `postfix`: 'parsed'. - `cache_dir`: DUMPS_CACHE_DIR. The output file is created with a random name and renamed atomically when it is complete. '\n' is appended to each line, therefore if the input is 'a\nb\n' and `parser` is not especified the output will be 'a\n\nb\n\n' ''' prefix = os.path.basename(filepath) if prefix is None else prefix output_name = '_'.join((prefix, postfix)) output_path = os.path.join(cache_dir, output_name) if os.path.exists(output_path): return output_path with dumper.temp_file(cache_dir, final_name=output_name) as (output, _): input_ = dumper.smart_open(filepath) for line in input_: if filter_(line): output.write(parser(line) + '\n') input_.close() return output_path
def test_smart_open_for_text_file(): tmp = make_temp_file('/tmp', 'abcdef') assert hasattr(dumper.smart_open(tmp), 'read') # check if object is file - python2/3 compatibility os.unlink(tmp)
def test_smart_open_for_text_file(): tmp = make_temp_file('/tmp', 'abcdef') ok_(isinstance(dumper.smart_open(tmp), __builtin__.file)) os.unlink(tmp)