#coding=utf-8 from b2 import system2 import xlrd system2.reload_utf8() xls = xlrd.open_workbook("texts/成都女司机data_3.xlsx") for booksheet in xls.sheets(): for row in xrange(booksheet.nrows): print booksheet.cell(row, booksheet.ncols - 1).value
#coding=utf-8 import sys import os import re import collections import json import math from b2 import file2 from optparse import OptionParser from b2 import system2 system2.reload_utf8() def print_msg(msg): sys.stdout.write("%s\n" % msg.encode("utf-8")) def word_split(input_file_path, save_path, ngram=2): word_freq = collections.defaultdict(int) if input_file_path == "stdin": inputs = sys.stdin else: inputs = file2.FilesRead([input_file_path]) for line in inputs: words = line.rstrip().replace(",", " ").replace("。", " ").split() for word in words: try: word = word.decode("utf-8") except Exception: