def test_translate_str(): assert translate_str(b"\xf1\x90\x80\x80\xc2\x90")['encoding'] == "utf-8" assert translate_str( b"fran\xc3\xa7ais \xc3\xa9l\xc3\xa8ve")['encoding'] == "utf-8" assert translate_str(b'\x83G\x83\x93\x83R\x81[\x83f\x83B\x83\x93\x83O\x82' b'\xcd\x93\xef\x82\xb5\x82\xad\x82\xc8\x82\xa2' )['language'] == "Japanese"
def test_translate_str(): teststr = 'Стамболийски' encoded_test_str = teststr.encode('ISO-8859-5') result = str_utils.translate_str(encoded_test_str) assert result['language'] == 'Bulgarian' assert result['encoding'] == 'ISO-8859-5' result = str_utils.translate_str('abcdéfg') assert result['language'] == 'unknown' assert result['encoding'] == 'utf-8'
def parse_comp_obj(self, field, res): try: self.cache_fields(field, res) user_type = field["user_type"] user_type_value = user_type.value.encode(user_type.charset) char_enc_guessed = translate_str(user_type_value) res.body['user_type'] = char_enc_guessed['converted'] res.body['user_type_encoding'] = char_enc_guessed['encoding'] except MissingField: pass try: res.body['prog_id'] = field['prog_id'].value except MissingField: pass
def jar_extract(self, filename, dest_dir): zf = None try: zf = zipfile.ZipFile(filename, "r") # Make sure this is actually a JAR unknown_charset_counter = 0 for zfname in zf.namelist(): uni_zfname = "" o = None try: zf_info = zf.getinfo(zfname) if not zf_info.orig_filename.endswith('\\') and not zf_info.orig_filename.endswith('/'): char_enc_guessed = translate_str(zfname) uni_zfname = char_enc_guessed['converted'] if char_enc_guessed['encoding'] == 'unknown': uni_zfname = f"unknown_charset_filename_{unknown_charset_counter}" unknown_charset_counter += 1 # creating the directory as problems if the filename # starts with a /, strip it off. if uni_zfname.startswith("/"): uni_zfname = uni_zfname[1:] unzipped_filename = os.path.normpath(os.path.join(dest_dir, uni_zfname)) zf_content = zf.read(zfname) if not os.path.exists(os.path.dirname(unzipped_filename)): os.makedirs(os.path.dirname(unzipped_filename)) try: o = open(unzipped_filename, 'wb') except Exception: # just in case there was invalid char ... uni_zfname = f"unknown_charset_filename_{unknown_charset_counter}" unknown_charset_counter += 1 unzipped_filename = os.path.normpath(os.path.join(dest_dir, uni_zfname)) o = open(unzipped_filename, 'wb') o.write(zf_content) except Exception as e: self.log.exception(f"Failed at extracting files from the JAR " f"({filename.encode('utf-8')} :: + {uni_zfname}). Error: {str(e)}") return False finally: if o is not None: try: o.close() except Exception: pass except (IOError, zipfile.BadZipfile): self.log.info(f"Not a ZIP File or Corrupt ZIP File: {filename}") return False except Exception as e: if type(e) == NotJARException: self.log.info(f"Not a JAR File: {filename}") raise self.log.exception(f"Caught an exception while analysing the file {filename}. [{e}]") return False finally: if zf is not None: try: zf.close() except Exception: pass return True