Ejemplo n.º 1
0
def process(start_dir,pattern,to_enc,to_eol_type,preview):
    if not os.path.exists(start_dir):
        print(_("%s: does'nt exists") % start_dir )
        return
    
    files = find_all_files(start_dir)
    count =0
    files_processed=[]  #files to be processed
    files_skipped=[]    #files to be skipped
    files_dec_ng=[]     #files that can't be decoded
    files_enc_ng =[]    #files that can't be encoded
    #gather information of files.current encoding,end of line
    for path in files:
        if not is_match_patterns_fnmatch(path, pattern.split(',')):
            continue
        try:
            encoding,data = get_encoding(path)
        except DecodeException as e:
            files_dec_ng.append(path)
            continue
        
        info = {'path':path,'encoding':encoding,'eol_type':inv_eol[get_eol(data)]}
        todo = get_todo(info, to_enc, to_eol_type)
        if len(todo)==0:
            files_skipped.append(info)
            continue
        
        if 'encoding' in todo:
            #test encoding
            buf_err = io.StringIO()
            if  not is_encode_ok(data, to_enc,buf_err):
                info['err_str']=buf_err.getvalue()
                files_enc_ng.append(info)
                continue

        files_processed.append(info)
    
    #print files that can't be decoded
    if len(files_dec_ng)>0:
        print(_("Can't decode these files.They are not processed:"))
        for path in files_dec_ng:
            print(path)
        print("---")
    
    #print files that can't be encoded
    if len(files_enc_ng)>0:
        print(_("Can't encode these files.They are not processed:"))
        arr=[]
        for info in files_enc_ng:
            arr.append( [info['encoding'],info['eol_type'],info['path'],info['err_str']])
        print_arr(arr, "[%s,%s] %s:%s")
        print("---")
    
    #print files to be skipped
    if len(files_skipped)>0:
        print(_("files to skip:"))
        arr=[]
        for info in files_skipped:
            arr.append([info['encoding'],info['eol_type'],info['path']])
        print_arr(arr,"[%s,%s] %s")
        print("---")
    
    #print files to be converted
    if len(files_processed)>0:
        print(_("files to convert:"))
        arr=[]
        for info in files_processed:
            todo = get_todo(info, to_enc, to_eol_type)
            
            if 'encoding' in todo:
                msg_to_enc = to_enc
            else:
                msg_to_enc = info['encoding']
            if 'eol' in todo:
                msg_to_eol_type = to_eol_type
            else:
                msg_to_eol_type = info['eol_type']

            arr.append([info["encoding"],info['eol_type'],msg_to_enc,msg_to_eol_type,info["path"]])
        print_arr(arr,"[%s,%s]->[%s,%s] %s")
        print("---")
    else:
        print(_("nothing to do."))
        return
    
    #return here if preview mode
    if preview:
        print (_("***preview mode***"))
        return
    
    #convert
    for info in files_processed:
        todo = get_todo(info, to_enc, to_eol_type)
        if len(todo)>0:
            eol = None
            if 'eol' in todo:
                eol = tbl_eol[to_eol_type]

            if to_enc == 'skip':
                conv_encoding(info["path"], info['encoding'],eol)    #specify original encoding,change only end of line
            else:
                conv_encoding(info["path"], to_enc,eol)
            count+=1

    print (count,_("files changed"))        
Ejemplo n.º 2
0
 def test_encoding(self):
     create_file('あああ\r\n\r\nいいい\r\n','cp932')
     conv_encoding('test/test.txt', 'utf-8', '\n')
     with open("test/test.txt","r",encoding="utf-8",newline='') as f:
         data = f.read()
     self.assertEqual(data,'あああ\n\nいいい\n')