def test_translate_str():
    assert translate_str(b"\xf1\x90\x80\x80\xc2\x90")['encoding'] == "utf-8"
    assert translate_str(
        b"fran\xc3\xa7ais \xc3\xa9l\xc3\xa8ve")['encoding'] == "utf-8"
    assert translate_str(b'\x83G\x83\x93\x83R\x81[\x83f\x83B\x83\x93\x83O\x82'
                         b'\xcd\x93\xef\x82\xb5\x82\xad\x82\xc8\x82\xa2'
                         )['language'] == "Japanese"
Beispiel #2
0
def test_translate_str():
    teststr = 'Стамболийски'
    encoded_test_str = teststr.encode('ISO-8859-5')
    result = str_utils.translate_str(encoded_test_str)
    assert result['language'] == 'Bulgarian'
    assert result['encoding'] == 'ISO-8859-5'
    result = str_utils.translate_str('abcdéfg')
    assert result['language'] == 'unknown'
    assert result['encoding'] == 'utf-8'
Beispiel #3
0
    def parse_comp_obj(self, field, res):
        try:
            self.cache_fields(field, res)
            user_type = field["user_type"]
            user_type_value = user_type.value.encode(user_type.charset)
            char_enc_guessed = translate_str(user_type_value)

            res.body['user_type'] = char_enc_guessed['converted']
            res.body['user_type_encoding'] = char_enc_guessed['encoding']
        except MissingField:
            pass

        try:
            res.body['prog_id'] = field['prog_id'].value
        except MissingField:
            pass
Beispiel #4
0
    def jar_extract(self, filename, dest_dir):
        zf = None
        try:
            zf = zipfile.ZipFile(filename, "r")

            # Make sure this is actually a JAR
            unknown_charset_counter = 0
            for zfname in zf.namelist():
                uni_zfname = ""
                o = None
                try:
                    zf_info = zf.getinfo(zfname)

                    if not zf_info.orig_filename.endswith('\\') and not zf_info.orig_filename.endswith('/'):
                        char_enc_guessed = translate_str(zfname)
                        uni_zfname = char_enc_guessed['converted']

                        if char_enc_guessed['encoding'] == 'unknown':
                            uni_zfname = f"unknown_charset_filename_{unknown_charset_counter}"
                            unknown_charset_counter += 1

                        # creating the directory as problems if the filename
                        # starts with a /, strip it off.
                        if uni_zfname.startswith("/"):
                            uni_zfname = uni_zfname[1:]

                        unzipped_filename = os.path.normpath(os.path.join(dest_dir, uni_zfname))
                        zf_content = zf.read(zfname)

                        if not os.path.exists(os.path.dirname(unzipped_filename)):
                            os.makedirs(os.path.dirname(unzipped_filename))

                        try:
                            o = open(unzipped_filename, 'wb')
                        except Exception:
                            # just in case there was invalid char ...
                            uni_zfname = f"unknown_charset_filename_{unknown_charset_counter}"
                            unknown_charset_counter += 1
                            unzipped_filename = os.path.normpath(os.path.join(dest_dir, uni_zfname))
                            o = open(unzipped_filename, 'wb')
                        o.write(zf_content)
                except Exception as e:
                    self.log.exception(f"Failed at extracting files from the JAR "
                                       f"({filename.encode('utf-8')} :: + {uni_zfname}). Error: {str(e)}")
                    return False
                finally:
                    if o is not None:
                        try:
                            o.close()
                        except Exception:
                            pass

        except (IOError, zipfile.BadZipfile):
            self.log.info(f"Not a ZIP File or Corrupt ZIP File: {filename}")
            return False
        except Exception as e:
            if type(e) == NotJARException:
                self.log.info(f"Not a JAR File: {filename}")
                raise

            self.log.exception(f"Caught an exception while analysing the file {filename}. [{e}]")
            return False
        finally:
            if zf is not None:
                try:
                    zf.close()
                except Exception:
                    pass

        return True