def read(file,encoding='',mod='r',return_filename=False,print_detect_encoding=True,**ka): '''if return_filename: return content,f.name 1 not is 2 ^ SyntaxError: invalid syntax ''' file=autoPath(file) if not encoding and print_detect_encoding: U=py.importU() _pde=U.get_duplicated_kargs(ka,'print_encoding','p_encoding','p','pde','pEncoding','p_decode') if not _pde is U.GET_DUPLICATED_KARGS_DEFAULT: #记住绝对不能用 == print_detect_encoding=_pde if not return_filename: U=py.importU() return_filename=U.get_duplicated_kargs(ka,'returnFile','rf','rfn','return_file','return_name',) if py.is2():f=py.open(file,mod) else:#is3 encoding=encoding or detectEncoding(file,confidence=0.9,default='utf-8',p=print_detect_encoding) #utf-8 /site-packages/astropy/coordinates/builtin_frames/__init__.py {'confidence': 0.73, 'encoding': 'Windows-1252' f=py.open(file,mod,encoding=encoding) s=f.read() f.close() if return_filename: return s,f.name else: return s
def writeIterable(file,data,end='\n',overwrite=True,encoding=None): U=py.importU() if not encoding:encoding=U.encoding file=autoPath(file) if overwrite:new(file) if py.is2():f=py.open(file,'a') else: f=py.open(file,'a',encoding=encoding) for i in data: f.write(py.str(i)+end) f.close() return f.name
def read_json(file,encoding=None): ''' ''' import json file=autoPath(file) if not encoding:encoding=detectEncoding(file) with py.open(file,encoding=encoding) as f: return json.load(f)
def dill_dump_bytes(obj,file=None,protocol=None,dill_ext='.dill'): ''' #TODO file=0 Not write '../0.dill' dill.dump(obj, file, protocol=None, byref=None, fmode=None, recurse=None) dill.dumps(obj, protocol=None, byref=None, fmode=None, recurse=None) ValueError: pickle protocol must be <= 4 r=request.get ... F.readableSize(len(F.dill_dump(protocol=None,obj=r) ) )#'14.192 KiB' F.readableSize(len(F.dill_dump(protocol=0,obj=r) ) ) #'15.773 KiB' F.readableSize(len(F.dill_dump(protocol=1,obj=r) ) ) #'19.177 KiB' F.readableSize(len(F.dill_dump(protocol=2,obj=r) ) ) #'18.972 KiB' F.readableSize(len(F.dill_dump(protocol=3,obj=r) ) ) #'14.192 KiB' F.readableSize(len(F.dill_dump(protocol=4,obj=r) ) ) #'13.694 KiB' ''' import dill if file: if py.istr(obj) and py.len(obj)<333 and '.dill' in obj: if not py.istr(file) or '.dill' not in file: file,obj=obj,file file=auto_path(file,ext=dill_ext) with py.open(file,'wb') as f: dill.dump(obj=obj,file=f,protocol=protocol) return file else: return dill.dumps(obj=obj,protocol=protocol)
def writeYaml(file,obj): import yaml try: with py.open(file,'w') as f: yaml.dump(obj,f,default_flow_style=False)#default_flow_style=False parameter is necessary to produce the format you want (flow style), otherwise for nested collections it produces block style: return file except Exception as e: return py.No(e,file,obj)
def lineCount(a): def blocks(files, size=65536): while True: b = files.read(size) if not b: break yield b with py.open(a, "r") as f: return sum(bl.count("\n") for bl in blocks(f))
def dill_load_file(file,dill_ext='.dill'): import dill dill.settings['ignore']=False #KeyError: 'ignore' file=auto_path(file,ext=dill_ext) try: with py.open(file,'rb') as f: return dill.load(f) except Exception as e:#TODO all load save py.No return py.No(file,e)
def new(a): '''will overwrite''' try: f=py.open(a,'w') f.write('') f.close() return f.name except Exception as e: setErr(e) return False
def include(file,keyword): if py.isbyte(keyword):mod='rb' else:mod='r' try: with py.open(file,mod) as f: for i in f: if keyword in i:return True except Exception as e: return py.No(e) return False
def serialize(obj,file=None,protocol=0): '''if not file: Return the pickled representation of the object as a bytes object. ''' import pickle if file: file=autoPath(file) with py.open(file,'wb') as f: pickle.dump(obj=obj,file=f,protocol=protocol) return file else: return pickle.dumps(obj=obj,protocol=protocol)
def read_bytes(file,size=-1,): '''is2 rb return str f.read(size=-1, /) Read and return up to n bytes. ''' import io if isinstance(file, io.BytesIO): file.seek(0) return file.read(-1) file=autoPath(file) try: with py.open(file,'rb') as f: return f.read(size) except Exception as e: return py.No(e,file)
def detect_file_encoding(file,confidence=0.7,default=py.No('not have default encoding'),buffer_size=9999,p=True,**ka): U,T,N,F=py.importUTNF() p=U.get_duplicated_kargs(ka,'print_file_encoding','print_detect_encoding','print',default=p,no_pop=True) if py.istr(file): with py.open(file,'rb') as f: b=f.read(buffer_size) elif py.isfile(file): if 'b' not in file.mode:raise py.ArgumentError("'b' not in file.mode",file) i=file.tell() b=file.read(buffer_size) file.seek(i) else:raise py.ArgumentError('need str or file') c= T.detect(b,confidence=confidence,default=default) if p:print(file,c) #TODO U.get_or_set('detect_file_encoding.p',True) return c
def write(file,data,mod='w',encoding='utf-8',mkdir=False,autoArgs=True,pretty=True,seek=None): '''py3 open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None) py2 open(name[, mode[, buffering]]) pretty=True Format a Python object into a pretty-printed representation. ''' U=py.importU() try: if autoArgs: if py.istr(data) and py.len(file)>py.len(data)>0: if '.' in data and '.' not in file and isFileName(data): file,data=data,file U.warring('F.write fn,data but seems data,fn auto corrected(v 纠正') except:pass # try: file=autoPath(file) if not encoding:encoding=U.encoding if mkdir:makeDirs(file,isFile=True) # if 'b' not in mod and py.isbytes(data):mod+='b'# 自动检测 data与 mod 是否匹配 if 'b' not in mod: #强制以 byte 写入 mod+='b' f=py.open(file,mod) #f.write(强制unicode) 本来只适用 py.is3() ,但 py2 中 有 from io import open if py.isint(seek): f.seek(seek) # with open(file,mod) as f: if py.isbyte(data):#istr(data) or (py.is3() and py.isinstance(data,py.bytes) ) : f.write(data) elif (py.is2() and py.isinstance(data,py.unicode)) : f.write(data.encode(encoding)) elif (py.is3() and py.istr(data)): # if 'b' in mod.lower(): f.write(data.encode(encoding)) # else:f.write(data)#*** UnicodeEncodeError: 'gbk' codec can't encode character '\xa9' in else: # if py.is2():print >>f,data # else: if pretty: data=U.pformat(data) U.pln(data,file=f) f.close() return f.name
def deSerialize(obj=None,file=None): '''The protocol version of the pickle is detected automatically, so no protocol argument is needed. Bytes past the pickled object's representation are ignored. ''' if not py.isbyte(obj) and not file:raise py.ArgumentError('need bytes or file=str ') import pickle if py.istr(obj): file=obj obj=None U.log('autoArgs file=%s'%file) if py.isbyte(obj): return pickle.loads(obj) else: file=autoPath(file) with py.open(file,'rb') as f: return pickle.load(f)
def readlines(a,EOL=True,encoding=None,str_repr=False): a=autoPath(a) if not encoding: encoding=detectEncoding(a) def _return(lines): if str_repr: U=py.importU() return [U.StrRepr(i) for i in lines] return lines try: if EOL: r=[] for i in py.open(a,encoding=encoding):r.append(i) else: r=read(a,encoding=encoding).splitlines() return _return(r) except Exception as e: return py.No(e)
def read_multi_files_return_bytes_list(*fs,max_size=8*1024*1024,return_all_bytes=False): r=[] def append(a): if return_all_bytes and not a: return r.append(b'') r.append(a) for f in fs: s=size_single_file(f) if not s : append(s) continue if s>max_size: append(py.No('f > max_size',f,max_size)) continue with py.open(f,'rb') as fp: b=fp.read(max_size) append(b) return r
def open(file,mode='r',**ka): '''py.open( file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None, ) ''' U,T,N,F=py.importUTNF() mode=U.get_duplicated_kargs(ka,'mode','mod','m',default=mode) if py.isfile(file): #if file.closed==False: return file elif py.istr(file): return py.open(file,mode=mode,**ka) else: raise py.ArgumentUnsupported(file,ka)
def csvAsList(fn): import csv with py.open(fn, 'rb') as f: reader = csv.reader(f) return py.list(reader)
def write_json(file,obj): import json file=auto_path(file,ext='json') with py.open(file,'w') as f: #not bytes,json write str json.dump(obj=obj,fp=f) return file
def readYaml(file): import yaml with py.open(file) as f: return yaml.safe_load(f)