Ejemplo n.º 1
0
def inverse_need_type(text,need_type):
    if len(text)>0:
        text_type = chardet.detect(text)
        codingType = text_type['encoding']
        if codingType != need_type:
            if isinstance(text, unicode):
                text = text.encode(need_type,'ignore')
            else:
                text = text.decode(codingType,'ignore').encode(need_type,'ignore')
    return text
Ejemplo n.º 2
0
def inverse_need_type(text, need_type):
    if len(text) > 0:
        text_type = chardet.detect(text)
        codingType = text_type['encoding']
        if codingType != need_type:
            if isinstance(text, unicode):
                text = text.encode(need_type, 'ignore')
            else:
                text = text.decode(codingType,
                                   'ignore').encode(need_type, 'ignore')
    return text
Ejemplo n.º 3
0
def format_text(text):
    text = text.split("\n")
    size = len(text) - 1
    
    encodeType = chardet.detect(text[1])
    result = ""
    
    print encodeType
    for i in range(0,size):
        text[i] = text[i].split("\t")
        result = result + text[i][10].decode(encodeType['encoding'],"ignore").encode("gbk","ignore") + "\n"
    file.close()
    
    file = open('./data/test.txt','w')
    file.write(result)
    file.close()
    return result
Ejemplo n.º 4
0
def format_text(text):
    text = text.split("\n")
    size = len(text) - 1

    encodeType = chardet.detect(text[1])
    result = ""

    print encodeType
    for i in range(0, size):
        text[i] = text[i].split("\t")
        result = result + text[i][10].decode(
            encodeType['encoding'], "ignore").encode("gbk", "ignore") + "\n"
    file.close()

    file = open('./data/test.txt', 'w')
    file.write(result)
    file.close()
    return result
Ejemplo n.º 5
0
def getTextEncoding(text):
    text_type = chardet.detect(text)
    codingType = text_type['encoding']
    return codingType
Ejemplo n.º 6
0
def getTextEncoding(text):
    text_type = chardet.detect(text)
    codingType = text_type['encoding']
    return codingType