Python detectの例、pip._vendor.chardet.detect Pythonの例

コード例 #1

0

ファイルを表示

def upload_file():
    if request.method == 'POST':  # če ima post zahteva datoteko
        if 'file' not in request.files:
            flash('No file part')  # izpiše če ni
            return redirect(request.url)
        file = request.files['file']
        if file.filename == '':  # če ni izbrane datoteke za nalaganje
            flash('No file selected for uploading')
            return redirect(request.url)
        if file and allowed_file(
                file.filename):  # če je datoteka izbrana in ustreza
            filename = secure_filename(file.filename)  # pretvori datoteko
            img_path = os.path.join(
                app.config['UPLOAD_FOLDER']) + "/" + filename
            file.save(os.path.join(app.config['UPLOAD_FOLDER'],
                                   filename))  # shranjevanje datoteke
            flash('Datoteka se je uspesno nalozila')  # feedback
            original_image = Image.open(img_path, mode='r')
            original_image = original_image.convert('RGB')
            detect(original_image,
                   img_path,
                   min_score=0.2,
                   max_overlap=0.5,
                   top_k=200)
            return redirect('/response?file=' + filename)
        else:
            flash('Allowed file types are txt, pdf, png, jpg, jpeg, gif')
            return redirect(request.url)

コード例 #2

0

ファイルを表示

    def loadAllHeroURL(self):
        '''获取英雄的名称列表'''

        #切换js，把它切成json字符串去掉第二个=号前面的和最后一个;号
        content = self.requestHelper(self.allHeroURL)
        # 获取数据的字符集，然后解码
        from pip._vendor import chardet
        encoding = chardet.detect(content)['encoding']
        print(encoding)
        content = content.decode(encoding=encoding)
        chapionListData = content.split('=')[2][:-1]
        data = eval(chapionListData)
        heroNameList = data['keys']
        heroDescri = str(data['data'])  #英雄的详细名称和图片
        self.version = data['version']
        self.updated = data['updated']
        #将英雄名称加入url中
        ifFirst = True
        for key, value in heroNameList.items():
            # print('%s----%s'%(key, value))
            #通过正则表达式取出中文名和称谓
            pattern = re.compile(
                key +
                r'[\s\S]*?\'name\': \'(.*?)\'[\s\S]*?\'title\': \'(.*?)\'',
                re.S)
            match = pattern.findall(heroDescri)
            name = match[0][0] + "-" + match[0][1]
            self.heroIdList[value] = name

コード例 #3

0

ファイルを表示

def load_data():
    csvs = get_vsvs()
    for csv_name in csvs:
        with open(csv_name, 'rb') as fin:
            encoding_type = chardet.detect(fin.read(70))['encoding']

            # reader = csv.reader(fin, delimiter=' ', quotechar='|', )
            # for row in reader:
            #     print(row)
        row_dict = OrderedDict()
        titles = []
        with open(csv_name, newline='', encoding=encoding_type) as f:
            reader = csv.reader(f)
            for row in reader:
                title_row = row[0]
                titles = title_row.split('\t')
                break
            print('titles=', titles)

            for row in reader:
                print('row==', row)
                title_row = row[0]
                # row = title_row.rsplit()
                print('split row ', row)
                row = title_row.split('\t')
                print('22split row ', row)

                d = {}
                for index, value in enumerate(row):
                    print(index)
                    d[titles[index]] = value
                row_dict[d.get(titles[0])] = d

            print(row_dict)
            return titles, row_dict

コード例 #4

0

ファイルを表示

ファイル: lexer.py プロジェクト: ZekriSara/pfe

    def get_tokens(self, text, unfiltered=False):
        """
        Return an iterable of (tokentype, value) pairs generated from
        `text`. If `unfiltered` is set to `True`, the filtering mechanism
        is bypassed even if filters are defined.

        Also preprocess the text, i.e. expand tabs and strip it if
        wanted and applies registered filters.
        """
        if not isinstance(text, str):
            if self.encoding == 'guess':
                text, _ = guess_decode(text)
            elif self.encoding == 'chardet':
                try:
                    from pip._vendor import chardet
                except ImportError as e:
                    raise ImportError(
                        'To enable chardet encoding guessing, '
                        'please install the chardet library '
                        'from http://chardet.feedparser.org/') from e
                # check for BOM first
                decoded = None
                for bom, encoding in _encoding_map:
                    if text.startswith(bom):
                        decoded = text[len(bom):].decode(encoding, 'replace')
                        break
                # no BOM found, so use chardet
                if decoded is None:
                    enc = chardet.detect(text[:1024])  # Guess using first 1KB
                    decoded = text.decode(
                        enc.get('encoding') or 'utf-8', 'replace')
                text = decoded
            else:
                text = text.decode(self.encoding)
                if text.startswith('\ufeff'):
                    text = text[len('\ufeff'):]
        else:
            if text.startswith('\ufeff'):
                text = text[len('\ufeff'):]

        # text now *is* a unicode string
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        if self.stripall:
            text = text.strip()
        elif self.stripnl:
            text = text.strip('\n')
        if self.tabsize > 0:
            text = text.expandtabs(self.tabsize)
        if self.ensurenl and not text.endswith('\n'):
            text += '\n'

        def streamer():
            for _, t, v in self.get_tokens_unprocessed(text):
                yield t, v

        stream = streamer()
        if not unfiltered:
            stream = apply_filters(stream, self.filters, self)
        return stream

コード例 #5

0

ファイルを表示

 def autoTransformEncoding(self, x):
     if (x == None or x == ''):
         return ''
     defaultEncoding = sys.getdefaultencoding()
     infoencode = chardet.detect(x).get('encoding', defaultEncoding)
     if (infoencode == None):
         infoencode = defaultEncoding
         return x.decode(infoencode, 'ignore').encode('utf-8')

コード例 #6

0

ファイルを表示

def upload_image():
    encoded_img = request.form[
        'base64']  # 'base64' is the name of the parameter used to post image file
    filename = request.form[
        'ImageName']  # 'ImageName' is name of the parameter used to post image name
    img_data = base64.b64decode(
        encoded_img)  # decode base64 string back to image
    image_path_name = os.path.join(
        app.config['UPLOAD_FOLDER']) + "/" + filename
    with open(image_path_name, 'wb') as f:  # "w"rite and "b"inary = wb
        f.write(img_data)
    original_image = Image.open(image_path_name, mode='r')
    original_image = original_image.convert('RGB')
    detect(original_image,
           image_path_name,
           min_score=0.2,
           max_overlap=0.5,
           top_k=200)
    return send_file(image_path_name)

コード例 #7

0

ファイルを表示

ファイル: test_artists.py プロジェクト: mkain0/playing-with-python

def test_get_artist(app, authorization_header):
    response = app.get('/music-archive/api/v1/artists/1',
                       headers=authorization_header,
                       follow_redirects=True)
    json_data = json.loads(
        response.data.decode(chardet.detect(response.data)["encoding"]))
    expected_json = load_json('single_artist.json')

    assert response.status_code == 200
    assert json_data == expected_json

コード例 #8

0

ファイルを表示

    def test_detail_page(self):
        re.sub(ur'.*[\u4E00-\u9FA5]+.*', '', unicode('中文'))
        epre = re.compile(r"[\s\w]+")
        chre = re.compile(ur".*[\u4E00-\u9FA5]+.*")
        jpre = re.compile(ur".*[\u3040-\u30FF\u31F0-\u31FF]+.*")
        hgre = re.compile(ur".*[\u1100-\u11FF\u3130-\u318F\uAC00-\uD7AF]+.*")

        x = '中文'
        defaultEncoding = sys.getdefaultencoding()
        infoencode = chardet.detect(x).get('encoding', defaultEncoding)

        print(x.decode(infoencode, 'ignore').encode('utf-8'))

コード例 #9

0

ファイルを表示

ファイル: main.py プロジェクト: oucbjy/ebdcsqltool

def turn(file):
    with open(file, 'rb') as f:
        data = f.read()
        encoding = chardet.detect(data)['encoding']
        data_str = data.decode(encoding)
        tp = 'LF'
        if '\r\n' in data_str:
            tp = 'CRLF'
            data_str = data_str.replace('\r\n', '\n')
        if encoding not in ['utf-8', 'ascii'] or tp == 'CRLF':
            with open(file, 'w', newline='\n', encoding='utf-8') as f:
                f.write(data_str)
            print(f"{file}: ({tp},{encoding}) trun to (LF,utf-8) success!")

コード例 #10

0

ファイルを表示

from urllib import request
import re

from pip._vendor import chardet

msg = "master msg"

req = request.urlopen('https://coding.imooc.com/')
html = req.read()
encode_type = chardet.detect(html)
html = html.decode(encode_type['encoding'])
img_urls = re.findall(r'src=.+\.jpg', html)
i = 0
for url in img_urls:
    url = str(url).replace("src=\"", "http:")
    # wb + 以二进制方式进行打开储存、图片需要以二进制方式打开存储
    f = open(str(i) + ".jpg", 'wb+')
    req = request.urlopen(url)
    img_file = req.read()
    f.write(img_file)
    i += 1

a = "master_1_3"

コード例 #11

0

ファイルを表示

def check_encoding_of_file(name):
    with open(name + ".csv", "rb") as file:
        result = chardet.detect(file.read())
        return result['encoding']

コード例 #12

0

ファイルを表示

# 5. Выполнить пинг веб-ресурсов yandex.ru, youtube.com и преобразовать результаты из байтовового
# в строковый тип на кириллице.
import subprocess

from pip._vendor import chardet

# args = ["ping","yandex.ru"]
# sub_ping = subprocess.Popen(args,stdout=subprocess.PIPE)
# for line in sub_ping.stdout:
#     result = chardet.detect(line)
#     line = line.decode(result["encoding"]).encode("utf-8")
#     print(line.decode("utf-8"))



args = ["ping","youtube.com"]
sub_ping = subprocess.Popen(args,stdout=subprocess.PIPE)
for line in sub_ping.stdout:
    result = chardet.detect(line)
    line = line.decode(result["encoding"]).encode("utf-8")
    print(line.decode("utf-8"))

コード例 #13

0

ファイルを表示

#-*-coding:utf-8-*-
import urllib
from pip._vendor import chardet

if __name__ == '__main__':
    url = "http://www.baidu.com"
    req = urllib.request.urlopen(url)
    html = req.read()
    cs = chardet.detect(html)
    print(type(cs))
    html = html.decode(cs.get("encoding" , "utf-8"))
    print(html)

コード例 #14

0

ファイルを表示

def file_name_is_legal(name):
    detect = chardet.detect(name.encode('utf-8'))
    return detect['encoding'] == 'ascii' or name.endswith('.py')

コード例 #15

0

ファイルを表示

def get_encoding(file):
    # 二进制方式读取，获取字节数据，检测类型
    with open(file, 'rb') as f:
        return chardet.detect(f.read())['encoding']