def test_exec(self): cmd = "python conv_encoding.py --start_dir test --to_enc utf-8 --pattern *.txt" ret,stdout,stderr = exec_command(cmd) self.assertEqual(ret,0) self.assertEqual(len(stderr), 0) print( stdout) self.assertEqual(get_encoding("test/cp932.txt")[0],"utf-8")
def process(start_dir,file_pattern,from_str,to_str,preview): if not os.path.exists(start_dir): print(_("%s: does'nt exists") % start_dir ) return files = find_all_files(start_dir) count =0 files_processed=[] #files to be processed files_dec_ng=[] #files that can't be decoded #gather information of files.current encoding,end of line for path in files: if not is_match_patterns_fnmatch(path, file_pattern.split(',')): continue try: encoding,data = get_encoding(path) except DecodeException: files_dec_ng.append(path) continue if from_str in data: files_processed.append(path) #print files that can't be decoded if len(files_dec_ng)>0: print(_("Can't decode these files.They are not processed:")) for path in files_dec_ng: print(path) print("---") #print files to be converted if len(files_processed)>0: print(_("files to replace:")) for path in files_processed: print(path) print("---") else: print(_("nothing to do.")) return #return here if preview mode if preview: print (_("***preview mode***")) return #convert for path in files_processed: replace_str(path,from_str,to_str) count+=1 print (count,_("files changed."))
def process(start_dir,pattern,to_enc,to_eol_type,preview): if not os.path.exists(start_dir): print(_("%s: does'nt exists") % start_dir ) return files = find_all_files(start_dir) count =0 files_processed=[] #files to be processed files_skipped=[] #files to be skipped files_dec_ng=[] #files that can't be decoded files_enc_ng =[] #files that can't be encoded #gather information of files.current encoding,end of line for path in files: if not is_match_patterns_fnmatch(path, pattern.split(',')): continue try: encoding,data = get_encoding(path) except DecodeException as e: files_dec_ng.append(path) continue info = {'path':path,'encoding':encoding,'eol_type':inv_eol[get_eol(data)]} todo = get_todo(info, to_enc, to_eol_type) if len(todo)==0: files_skipped.append(info) continue if 'encoding' in todo: #test encoding buf_err = io.StringIO() if not is_encode_ok(data, to_enc,buf_err): info['err_str']=buf_err.getvalue() files_enc_ng.append(info) continue files_processed.append(info) #print files that can't be decoded if len(files_dec_ng)>0: print(_("Can't decode these files.They are not processed:")) for path in files_dec_ng: print(path) print("---") #print files that can't be encoded if len(files_enc_ng)>0: print(_("Can't encode these files.They are not processed:")) arr=[] for info in files_enc_ng: arr.append( [info['encoding'],info['eol_type'],info['path'],info['err_str']]) print_arr(arr, "[%s,%s] %s:%s") print("---") #print files to be skipped if len(files_skipped)>0: print(_("files to skip:")) arr=[] for info in files_skipped: arr.append([info['encoding'],info['eol_type'],info['path']]) print_arr(arr,"[%s,%s] %s") print("---") #print files to be converted if len(files_processed)>0: print(_("files to convert:")) arr=[] for info in files_processed: todo = get_todo(info, to_enc, to_eol_type) if 'encoding' in todo: msg_to_enc = to_enc else: msg_to_enc = info['encoding'] if 'eol' in todo: msg_to_eol_type = to_eol_type else: msg_to_eol_type = info['eol_type'] arr.append([info["encoding"],info['eol_type'],msg_to_enc,msg_to_eol_type,info["path"]]) print_arr(arr,"[%s,%s]->[%s,%s] %s") print("---") else: print(_("nothing to do.")) return #return here if preview mode if preview: print (_("***preview mode***")) return #convert for info in files_processed: todo = get_todo(info, to_enc, to_eol_type) if len(todo)>0: eol = None if 'eol' in todo: eol = tbl_eol[to_eol_type] if to_enc == 'skip': conv_encoding(info["path"], info['encoding'],eol) #specify original encoding,change only end of line else: conv_encoding(info["path"], to_enc,eol) count+=1 print (count,_("files changed"))
def test_call(self): process("test","*.txt","utf-8",'CRLF',False) enc,data = get_encoding("test/cp932.txt") self.assertEqual(enc,"utf-8") self.assertEqual(get_eol(data),'\r\n')