Python Pinyin.get_pinyinの例、xpinyin.Pinyin.get_pinyin Pythonの例

コード例 #1

0

ファイルを表示

def article_cat_save():
    form = Article_cat(request.form)
    p = Pinyin()
    if form.validate():
        cat_id = request.form.get('cat_id')
        parent_id = request.form.get('parent_id')
        cat_name = request.form.get('cat_name')
        dir = request.form.get('dir')
        check = request.form.get('check')
        if check:
            dir = request.form.get('cat_name')
            dir = p.get_pinyin(dir, '')
        else:
            if dir:
                dir = request.form.get('dir')
            else:
                dir = request.form.get('cat_name')
                dir = p.get_pinyin(dir, '')
        keywords = request.form.get('keywords')
        description = request.form.get('description')
        cat_sort = request.form.get('cat_sort')
        status = request.form.get('status')
        Articles_Cat.query.filter(Articles_Cat.cat_id == cat_id).update({Articles_Cat.parent_id:parent_id, Articles_Cat.cat_name:cat_name, Articles_Cat.dir:dir,\
                                                                         Articles_Cat.keywords:keywords, Articles_Cat.description:description,Articles_Cat.cat_sort:cat_sort,\
                                                                         Articles_Cat.status:status})
        db.session.commit()
        return redirect(url_for('admin.article_cat_list'))

コード例 #2

0

ファイルを表示

    def generate_hostname(self):
        pinyin = Pinyin()

        instanceName = Instance.objects.filter(SerialNumber=self.serialnumber)
        info = instanceName[0].InstanceName
        innerIP = instanceName[0].InnerIpAddress[0]
        pubIP = instanceName[0].PublicIpAddress[0]
        status = instanceName[0].Status
        instanceid = instanceName[0].InstanceId
        if not info:
            raise ValueError

        else:
            info = instanceName[0].InstanceName.split("-")
            hostname_last = Server.objects.filter(
                hostname__regex = r'^%s[0-9]{3}.meetyima.com$' %(pinyin.get_pinyin(info[0],'') + '-' + info[1] + '-' + info[2]  + '-')
            ).order_by('hostname').last()

            #如果查到有同名主机名,id加1,否则id为001
            if hostname_last:
                hostname_last_id = str(int(hostname_last.hostname.split("-")[3].split(".")[0]) + 1).zfill(3)
            else:
                hostname_last_id = '001'
            hostname = pinyin.get_pinyin(info[0], '') + '-' + info[1] + '-' + info[2] + '-' + hostname_last_id + '.' + 'meetyima.com'
        return hostname,innerIP,pubIP,status,instanceid

コード例 #3

0

ファイルを表示

def get_candidates(error_phrase):

    candidates_1st_order = []
    candidates_2nd_order = []
    candidates_3nd_order = []

    p = Pinyin()
    error_pinyin = p.get_pinyin(error_phrase)
    re.sub("-", "/", error_pinyin)
    cn_words_dict = load_cn_words_dict(
        "HW10/Autochecker4Chinese-master/cn_dict.txt")
    candidate_phrases = list(known(edits1(error_phrase, cn_words_dict)))

    for candidate_phrase in candidate_phrases:
        # candidate_pinyin = pinyin.get(candidate_phrase, format="strip", delimiter="/").encode("utf-8")
        candidate_pinyin = p.get_pinyin(candidate_phrase)
        re.sub("-", "/", candidate_pinyin)
        if candidate_pinyin == error_pinyin:
            candidates_1st_order.append(candidate_phrase)
        elif candidate_pinyin.split("/")[0] == error_pinyin.split("/")[0]:
            candidates_2nd_order.append(candidate_phrase)
        else:
            candidates_3nd_order.append(candidate_phrase)

    return candidates_1st_order, candidates_2nd_order, candidates_3nd_order

コード例 #4

0

ファイルを表示

  def decode(self,area):
    p = Pinyin()
    print("\n\n\n\n\n zone_level_area  :"+area)

    for province in self.zonelevel:
      children=province["children"]
      for city in children:
        value=city["value"]
        if value in area:
          return {"province":province["value"],"city":value,"area":"","level":"3"}
    
    for province in self.zonelevel:
      value=province["value"]
      if value in area:
        return {"province":value,"city":"","area":"","level":"2"}

    if "中华人民共和国" in area:
      return {"province":"","city":"","area":"","level":"1"}
    
    for province in self.zonelevel:
      children=province["children"]
      province_name=province["value"]
      for city in children:
        city_name=city["value"]
        city_name_py="_"+p.get_pinyin(province_name,'')+"_"+p.get_pinyin(city_name,'')
        print("city_name_py:"+city_name_py.lower()+"  "+ area.lower())
        if city_name_py.lower() in area.lower():
          area_name=area.split("(")[0]
          return {"province":province_name,"city":city_name,"area":area_name,"level":"4"}
    
    return {"province":"","city":"","area":"","level":"4"}

コード例 #5

0

ファイルを表示

ファイル: welcome.py プロジェクト: luckistmaomao/webmaster

def welcome():
    options = parse_options()
    p = Pinyin()
    if options.name:
        username = options.name
        username = unicode(username)
        second_name = username[1:]
        pinyin = p.get_pinyin(username, " ").split()
        pinyin_abbrev = pinyin[0] + "".join(item[0] for item in pinyin[1:])
        message_format = u"{0}同学好，大家好，\n最近我们组迎来了{0}同学，欢迎他们加入我们温暖的大家庭！\n{1}同学的组内邮箱为： {2}@nlp.nju.edu.cn\n大家多联系，多关照。\n如果有任何问题，请联系管理员程善伯[email protected]或郁振庭[email protected]。\n另：组内为每位同学分配了一定资源，附件中为组内资源介绍。\n\n祝好！\n\n谢谢\n振庭"
        message = message_format.format(username, second_name, pinyin_abbrev)
        print message
    elif options.file:
        message_format = "大家好，\n最近我们组迎来了：\n{0}等{1}位同学同学。\n欢迎他们加入我们温暖的大家庭！\n大家多联系，多关照。\n如果有任何问题，请联系管理员\n另：组内为每位同学分配了一定资源，附件中为组内资源介绍以及组内编程规范，\n请仔细阅读。\n\n祝好！\n\n谢谢\n振庭"
        with codecs.open(options.file, encoding="utf-8") as infile:
            names = [line.strip() for line in infile]
        num_students = len(names)
        pinyins = [p.get_pinyin(name, " ").split() for name in names]
        pinyin_abbrevs = [pinyin[0] + "".join(item[0] for item in pinyin[1:]) for pinyin in pinyins]
        email_format = u"{0}({1}@nlp.nju.edu.cn)"
        emails = [email_format.format(name, pinyin_abbrev) for name, pinyin_abbrev in izip(names, pinyin_abbrevs)]
        message = message_format.format("\n".join(emails), num_students)
        print message
    else:
        print >> sys.stderr, "Please check your options"

コード例 #6

0

ファイルを表示

ファイル: tfidf_xgboost.py プロジェクト: sigmeta/xdf

def get_matrix_pinyin(pos_path="data/samples/positive.txt",
                      neg_path="data/samples/negative.txt"):
    from xpinyin import Pinyin
    dataset = []
    pin = Pinyin()
    with open(pos_path, encoding='utf8') as f:
        dataset += [
            pin.get_pinyin(line, '').split() for line in f if line != '\n'
        ]
        pos_len = len(dataset)
        print("positive matrix length", pos_len)
    with open(neg_path, encoding='utf8') as f:
        dataset += [
            pin.get_pinyin(line, '').split() for line in f if line != '\n'
        ]
        neg_len = len(dataset) - pos_len
        print("negative matrix length", neg_len)
    dct = Dictionary(dataset)
    print("dictionary length", len(dct))
    corpus = [dct.doc2bow(line) for line in dataset]
    model = TfidfModel(corpus)
    pos_matrix = np.zeros((pos_len, len(dct)))
    neg_matrix = np.zeros((neg_len, len(dct)))
    for i, line in enumerate(model[corpus][:pos_len]):
        for j, n in line:
            pos_matrix[i, j] = n
    for i, line in enumerate(model[corpus][pos_len:]):
        for j, n in line:
            neg_matrix[i, j] = n
    print("get matrix completed")
    return pos_matrix, neg_matrix

コード例 #7

0

ファイルを表示

ファイル: excel_write.py プロジェクト: Shuangtonglee/MT_LianJiaSpider

 def xiaoqu_write_into_excel(self):
     regions = [
         '浦东', '闵行', '宝山', '徐汇', '普陀', '杨浦', '长宁', '松江', '嘉定', '黄浦', '静安',
         '闸北', '虹口', '青浦', '奉贤', '金山', '崇明', '上海周边'
     ]
     #regions = ['闵行','虹口','浦东'] #测试
     p = Pinyin()
     filename = '上海链家小区数据.xlsx'
     sheet_number = 0
     wb = Workbook()
     ws = []
     for region_name in regions:
         print(region_name)
         ws.append(wb.create_sheet(region_name))
         if region_name == '闵行':
             region_name = 'minhang'
         else:
             region_name = p.get_pinyin(region_name, '')  #汉字转为拼音以符合链接要求
         region_name = p.get_pinyin(region_name, '')
         urls = xiaoqu.url(region_name)
         xiaoqu_list = xiaoqu.xiaoqu_data(urls)
         ws[sheet_number].append(
             ['小区名称', '挂牌均价（元/平）', '在售数量(套)', '区域', '位置', '地铁', '建造时间'])
         for xq in xiaoqu_list:
             print(xq)
             ws[sheet_number].append([
                 xq[0],
                 toNumber(xq[1]),
                 toNumber(xq[2]), xq[3], xq[4], xq[5], xq[6]
             ])
         sheet_number += 1
     wb.save(filename=filename)
     print('Took {}s'.format(time() - ts))

コード例 #8

0

ファイルを表示

ファイル: tests.py プロジェクト: leafcoder/cpinyin

class PinyinTests(unittest.TestCase):
    def setUp(self):
        import cpinyin
        cpinyin.install()
        from xpinyin import Pinyin
        self.p = Pinyin()

    def test_get_pinyin_with_default_splitter(self):
        self.assertEqual(self.p.get_pinyin(u'上海'), u'shang-hai')

    def test_get_pinyin_with_splitter(self):
        self.assertEqual(self.p.get_pinyin(u'上海', splitter=u''), u'shanghai')

    def test_get_pinyin_mixed_words(self):
        self.assertEqual(self.p.get_pinyin(u'Apple发布iOS7', splitter=u'-'),
                         u'Apple-fa-bu-iOS7')

    def test_get_pinyin_with_tone_marks(self):
        self.assertEqual(self.p.get_pinyin(u'上海', tone_marks=u'marks'),
                         u'sh\xe0ng-h\u01cei')

    def test_get_pinyin_with_tone_marks(self):
        self.assertEqual(self.p.get_pinyin(u'秋', tone_marks=u'marks'), u'qiū')

    def test_get_initial(self):
        self.assertEqual(self.p.get_initial(u'你'), u'N')

    def test_get_initials(self):
        self.assertEqual(self.p.get_initials(u'你好'), u'N-H')

    def test_get_initials_with_splitter(self):
        self.assertEqual(self.p.get_initials(u'你好', u' '), u'N H')
        self.assertEqual(self.p.get_initials(u'你好', u''), u'NH')

コード例 #9

0

ファイルを表示

ファイル: gender_classifier.py プロジェクト: hodsonjames/demographics

def process_east_asian_dataset():
    data = {}
    p = Pinyin()
    female_chinese, male_chinese = [], []

    with open('data/female_c.txt') as f:
        for line in f:
            line = line.replace('\n', '').replace('\t', ' ')
            vals = line.split(' ')
            cleaned_vals = [i for i in vals if i]
            female_chinese += cleaned_vals

    converted_female_chinese = [p.get_pinyin(i).replace('、', ' ').replace('-', '').replace('\n', '') for i in female_chinese]
    for i in converted_female_chinese:
        if i not in data.keys():
            data[i] = 'F'
        if i + i not in data.keys():
            data[i + i] = 'F'

    with open('data/male_c.txt') as f:
        for line in f:
            line = line.replace('\n', '').replace('\t', ' ')
            vals = line.split(' ')
            cleaned_vals = [i for i in vals if i]
            male_chinese += cleaned_vals

    converted_male_chinese = [p.get_pinyin(i).replace('、', ' ').replace('-', '').replace('\n', '') for i in male_chinese]
    for i in converted_male_chinese:
        if i not in data.keys():
            data[i] = 'M'
        if i + i not in data.keys():
            data[i + i] = 'M'

    return data

コード例 #10

0

ファイルを表示

ファイル: convert.py プロジェクト: xzmeng/meishicheng

 def handle(self, *args, **options):
     pin = Pinyin()
     MONGO_URI = 'mongodb://localhost:27017'
     MONGO_DB = 'chinese_food'
     mongo_client = pymongo.MongoClient(MONGO_URI)
     db = mongo_client[MONGO_DB]
     categories = set()
     for food in db.food.find({}):
         categories.add(food['category'])
     categories = list(categories)
     # 虚构了几个餐厅
     restaurants = ['阿坤私房菜', '橘子餐厅', '北欧时光·清真',
                    '辣一天川小館', '川人百味']
     categories = [Category(name=cat, slug=pin.get_pinyin(cat)) for cat in categories]
     restaurants = [Restaurant(name=res) for res in restaurants]
     for category in categories:
         category.save()
     for res in restaurants:
         res.save()
     for category in categories:
         for food in db.food.find({'category': category.name}):
             product = Product.objects.create(
                 category=category,
                 name=food['name'],
                 slug=slugify(pin.get_pinyin(food['name'])),
                 image='/static/food_images/{}/{}.jpg'.format(
                     category.name, food['name']
                 ),
                 kouwei=food['kouwei'],
                 gongyi=food['gongyi'],
                 restaurant=random.choice(restaurants),
                 description=food['intro'] or 'no description.',
                 price=random.randint(10, 100))
             product.save()

コード例 #11

0

ファイルを表示

ファイル: evaluation.py プロジェクト: 0olinchaoo0/learngit

def pinyin(target, answer):
    p = Pinyin()

    dict_num2str = {
        '1': '一',
        '2': '俩',
        '3': '三',
        '4': '四',
        '5': '五',
        '6': '六',
        '7': '七',
        '8': '八',
        '9': '九'
    }

    answer_new = ''
    #数字转换成中文
    for a_i in answer:
        if a_i in dict_num2str.keys():
            answer_new = answer_new + dict_num2str[a_i]
        else:
            answer_new = answer_new + a_i
    #中文转换成拼音
    answer_new = p.get_pinyin(target, '')
    target_new = p.get_pinyin(answer_new, '')

    if target_new == answer_new:
        print(True)
    else:
        print(False)

コード例 #12

0

ファイルを表示

def inTitle(name):
    p = Pinyin()
    if len(name) > 0:

        if name[:2] in D:
            last = name[:2]
            first = name[2:]

            L = p.get_pinyin(last, '')
            F = p.get_pinyin(first, '')
        else:

            last = name[:1]
            first = name[1:]

            L = p.get_pinyin(last, '')
            F = p.get_pinyin(first, '')

        titleText = [
            '"' + first + last + '"', '"' + last + first + '"',
            '"' + L + " " + F + '"', '"' + F + " " + L + '"'
        ]
        return titleText
    else:
        return None

コード例 #13

0

ファイルを表示

ファイル: process_country_gender_zh.py プロジェクト: pandasasa/name

def convert_to_pinyin(name):
    name = tradition2simple(name)
    py = Pinyin()
    pinyin = ' '.join(
            [string.capitalize(py.get_pinyin(name[1:], '')),
            string.capitalize(py.get_pinyin(name[0], ''))]
    )
    return pinyin

コード例 #14

0

ファイルを表示

def name_submit(request):
    ip = log_visitor_ip(request)
    request_data = {}
    if request.method == 'GET':
        request_data = request.GET
    elif request.method == 'POST':
        request_data = request.POST
    response_data = {}

    global GENDER_PREDICTOR, CHINESE_GENDER_PREDICTOR, COUNTRY_PREDICTOR
    if not CHINESE_GENDER_PREDICTOR:
        CHINESE_GENDER_PREDICTOR = load_gender_predict_model(
            CHINESE_GENDER_MODEL_PATH)
    if not GENDER_PREDICTOR:
        GENDER_PREDICTOR = load_gender_predict_model(GENDER_MODEL_PATH)
    if not COUNTRY_PREDICTOR:
        COUNTRY_PREDICTOR = load_country_predict_model()

    target_name = request_data['name']

    if not target_name:
        return JsonResponse({'gender': 'NONAME'})
    try:
        name_info = NameInfo(name=request_data['name'],
                             gender=request_data.get('gender', None),
                             country=request_data.get('country', None),
                             time=datetime.datetime.now(),
                             ip=ip)
        name_info.save()
    except:
        pass

    is_chinese = any(u'\u4e00' <= c <= u'\u9fff' for c in target_name)
    if is_chinese:
        py = Pinyin()
        target_name = ' '.join([
            string.capitalize(py.get_pinyin(target_name[1:], '')),
            string.capitalize(py.get_pinyin(target_name[0], ''))
        ])
    if type(target_name) is unicode:
        target_name = target_name.encode('utf-8')
    ### Country Prediction
    country = COUNTRY_PREDICTOR.predict(target_name)
    response_data['country'] = country.capitalize()
    if country == 'china':
        is_chinese = True
    ### Gender Prediction
    if is_chinese:
        is_male = CHINESE_GENDER_PREDICTOR.predict(target_name)
    else:
        is_male = GENDER_PREDICTOR.predict(target_name)

    if is_male:
        response_data['gender'] = 'MALE'
    else:
        response_data['gender'] = 'FEMALE'

    return JsonResponse(response_data)

コード例 #15

0

ファイルを表示

def article_cat_edit(id):
    if request.method == 'GET':
        cat_list = Articles_Cat.query.filter_by(cat_id=id).first()
        categorys = Articles_Cat.query.all()  # 取得所有分类
        list = []
        data = {}
        for cat in categorys:
            data = dict(cat_id=cat.cat_id,
                        parent_id=cat.parent_id,
                        cat_name=cat.cat_name)
            list.append(data)
        data = build_tree(list, 0, 0)
        html = build_table(data, parent_title='顶级菜单')
        return render_template('admin/articel_cat_edit.html',
                               content=cat_list,
                               message=html)
    else:
        form = Article_cat(request.form)
        p = Pinyin()
        if form.validate():
            parent_id = request.form.get('parent_id')
            print(parent_id)
            cat_id = int(request.form.get('cat_id'))
            cat_name = request.form.get('cat_name')
            dir = request.form.get('dir')
            check = request.form.get('check')
            if check:
                dir = request.form.get('cat_name')
                dir = p.get_pinyin(dir, '')
            else:
                if dir:
                    dir = request.form.get('dir')
                else:
                    dir = request.form.get('cat_name')
                    dir = p.get_pinyin(dir, '')
            keywords = request.form.get('keywords')
            description = request.form.get('description')
            cat_sort = request.form.get('cat_sort')
            status = request.form.get('status')
            Articles_Cat.query.filter(Articles_Cat.cat_id == cat_id).update({
                Articles_Cat.parent_id:
                parent_id,
                Articles_Cat.cat_name:
                cat_name,
                Articles_Cat.dir:
                dir,
                Articles_Cat.keywords:
                keywords,
                Articles_Cat.description:
                description,
                Articles_Cat.cat_sort:
                cat_sort,
                Articles_Cat.status:
                status
            })
            db.session.commit()
            return redirect(url_for('admin.article_cat_list'))

コード例 #16

0

ファイルを表示

def article_cat_add():
    if request.method == 'GET':
        categorys = Articles_Cat.query.all()  # 取得所有分类
        list = []
        data = {}

        for cat in categorys:
            data = dict(cat_id=cat.cat_id,
                        parent_id=cat.parent_id,
                        cat_name=cat.cat_name)
            list.append(data)
        data = build_tree(list, 0, 0)
        print(data)
        # print(list)
        html = build_table(data, parent_title='顶级菜单')
        # print(html)
        return render_template('admin/article_cat.html',
                               message=html)  # article_cat.html
    else:
        form = Article_cat(request.form)
        p = Pinyin()
        dir = request.form.get('dir')
        print(dir)
        if form.validate():
            parent_id = request.form.get('parent_id')
            cat_name = request.form.get('cat_name')
            dir = request.form.get('dir')
            check = request.form.get('check')
            if check:
                dir = request.form.get('cat_name')
                dir = p.get_pinyin(dir, '')
            else:
                if dir:
                    dir = request.form.get('dir')
                else:
                    dir = request.form.get('cat_name')
                    dir = p.get_pinyin(dir, '')
            keywords = request.form.get('keywords')
            description = request.form.get('description')
            cat_sort = request.form.get('cat_sort')
            status = request.form.get('status')
            insert = Articles_Cat(parent_id=parent_id,
                                  cat_name=cat_name,
                                  dir=dir,
                                  keywords=keywords,
                                  description=description,
                                  cat_sort=cat_sort,
                                  status=status)
            db.session.add(insert)
            db.session.commit()

            return redirect(url_for('admin.article_cat_list'))
        else:
            print("校验没有通过")
            return "校验没通过"

コード例 #17

0

ファイルを表示

ファイル: poi.py プロジェクト: lukySmile/tencent-location

def write_to_excel(poilist, cityname, classfield, coord):
    # 一个Workbook对象，这就相当于创建了一个Excel文件
    book = xlwt.Workbook(encoding='utf-8', style_compression=0)
    sheet = book.add_sheet(classfield, cell_overwrite_ok=True)

    # 第一行(列标题)
    sheet.write(0, 0, 'lon')
    sheet.write(0, 1, 'lat')
    sheet.write(0, 2, 'name')
    sheet.write(0, 3, 'address')
    sheet.write(0, 4, 'pname')
    sheet.write(0, 5, 'cityname')
    sheet.write(0, 6, 'business_area')
    sheet.write(0, 7, 'type')

    for i in range(len(poilist)):
        location = poilist[i]['location']
        name = poilist[i]['name']
        address = poilist[i]['address']
        pname = poilist[i]['pname']
        cityname = poilist[i]['cityname']
        business_area = poilist[i]['business_area']
        type = poilist[i]['type']
        lng = str(location).split(",")[0]
        lat = str(location).split(",")[1]

        if (coord == "2"):
            result = gcj02_to_wgs84(float(lng), float(lat))
            lng = result[0]
            lat = result[1]
        if (coord == "3"):
            result = gcj02_to_bd09(float(lng), float(lat))
            lng = result[0]
            lat = result[1]

        # 每一行写入
        sheet.write(i + 1, 0, lng)
        sheet.write(i + 1, 1, lat)
        sheet.write(i + 1, 2, name)
        sheet.write(i + 1, 3, address)
        sheet.write(i + 1, 4, pname)
        sheet.write(i + 1, 5, cityname)
        sheet.write(i + 1, 6, business_area)
        sheet.write(i + 1, 7, type)

    # 最后，将以上操作保存到指定的Excel文件中
    p = Pinyin()
    p.get_pinyin(cityname)
    path = "data/poi/" + p.get_pinyin(cityname) + "-" + p.get_pinyin(
        classfield) + '.xls'
    book.save(r'' + os.getcwd() + "/" + path)
    return path

コード例 #18

0

ファイルを表示

class Fuzzname(object):
    def __init__(self):
        self.fuzzymap = dict()
        self.pinyin = Pinyin()

    def fit(self, namelis):
        self.fuzzymap = dict()
        for name in namelis:
            self.fuzzymap[name + " " + self.pinyin.get_pinyin(name, '')] = name

    def predict(self, name):
        namepin = name + " " + self.pinyin.get_pinyin(name, '')
        res = process.extractOne(namepin, self.fuzzymap.keys())
        res = self.fuzzymap[res[0]]
        return res

コード例 #19

0

ファイルを表示

ファイル: cloudFloat.py プロジェクト: XingshengLiu/PythonTools

def generateSpell():
    beanList = []
    workbook = xlrd.open_workbook(os.getcwd() + '\\data.xlsx')
    sheet = workbook.sheets()[0]
    rows = sheet.nrows
    for row in range(1, rows):
        bean = Bean()
        bean.mean = sheet.cell_value(row, 0).strip().replace('/', '-')
        bean.speech = sheet.cell_value(row, 1).strip()
        bean.slice = sheet.cell_value(row, 2).strip()
        beanList.append(bean)
    for bean in beanList:
        p = Pinyin()
        spellList = p.get_pinyin(bean.slice, " ")
        phonetices = spellList.split(" ")
        str1 = str(phonetices).replace('[', '{')
        str2 = str1.replace(']', '}')
        str3 = str2.replace('\'', '\"')
        str4 = str3.replace(' ', '')
        bean.spell = str4
    i = 1
    workbook = xlsxwriter.Workbook(os.getcwd() + '\\data_former.xlsx')
    ws = workbook.add_worksheet(u'Sheet1')
    ws = workbook.get_worksheet_by_name('Sheet2')
    ws.write(i, 3, 'test')
    for bean in beanList:
        ws.write(i, 0, bean.mean)
        ws.write(i, 1, bean.speech)
        ws.write(i, 2, bean.slice)
        i = i + 1
    workbook.close()
    return beanList

コード例 #20

0

ファイルを表示

ファイル: 3.weather.py プロジェクト: zyboy233/Learning.github.io

    def addWeather(self,cityList):

        for city in cityList:
            url = 'https://www.apiopen.top/weatherApi?city={}'.format(city)
            response = urlopen(quote(url,safe=string.printable))
            responseStr = response.read()
            responseJson = json.loads(responseStr)
            print(responseJson)

            p = Pinyin()
            city = p.get_pinyin(city)
            city = city.replace('-','')
            self.cursor.execute('create table if not exists {}(date text,high text,low text)'.format(city))
            self.con.commit()
            list = []
            list.append(responseJson['data']['yesterday']['date'])
            list.append(responseJson['data']['yesterday']['high'])
            list.append(responseJson['data']['yesterday']['low'])
            self.cursor.execute('INSERT INTO {} VALUES ("{}","{}","{}")'.format(city, list[0], list[1], list[2]))
            self.con.commit()
            for index in responseJson['data']['forecast']:
                list = []
                list.append(index['date'])
                list.append((index['high']))
                list.append(index['low'])
                self.cursor.execute('INSERT INTO {} VALUES ("{}","{}","{}")'.format(city,list[0],list[1],list[2]))
                self.con.commit()

コード例 #21

0

ファイルを表示

def airMysqlDataFransform():
    conn = pymysql.connect(host='127.0.0.1', \
                           user='******', password='******', \
                           db='testdata', charset='utf8', \
                           use_unicode=True)
    # 输入的地点数据文件
    placeItemsFile = '../data/placeItems.csv'
    # 查询出所有的监测空气的地区
    placeSql = "SELECT DISTINCT(monitoring_point) from air_quality"
    placeItems = pd.read_sql(placeSql, con=conn)
    # 文件名用拼音
    p = Pinyin()
    # 存储监测空气的地区
    placeItems.to_csv(placeItemsFile)
    # 当前时间
    print("Time:", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    for indexs in placeItems.index:
        placeItem = placeItems.loc[indexs].values[0]
        # 去掉空值
        if (placeItem != None):
            # 预处理表air_quality
            placeItem = p.get_pinyin(placeItem, '')
            airSql = handleAirDataSql(placeItem)
            # 数据查询转换
            AQIData = pd.read_sql(airSql, con=conn)
            placeAir = "../airData/" + placeItem + ".csv"
            AQIData.to_csv(placeAir, index=False)
            print("placeAir to csv finished")

コード例 #22

0

ファイルを表示

def getGameLink(word, p=1):
    '''递归'''

    pyinyin = Pinyin()
    eqKey = pyinyin.get_pinyin(word, "_")
    url = "https://store.steampowered.com/search/?term=" + word + "&page=" + str(
        p)
    print(url)

    if p == 1:
        if (not hasattr(getGameLink,
                        'total')):  # hasattr函数的第一个变量为当前函数名，第二个为变量名，加单引号
            getGameLink.total = getTotal(url)  # 注意之后使用这个变量时一定要在变量名前加  函数名.

    r = requests.get(url, common.getHeaders())
    soup = BeautifulSoup(r.text, "lxml")

    if p == getGameLink.total:
        return True

    # print(p)
    # print(getGameLink.total)

    # 获取链接，保存
    links = getApplinks(soup)
    sleep_sec = random.randint(1, 20)
    print("关键词：%s 共计 %d 页 | 当前采集第 %d页；采集链接数：%d | 休眠 %d 秒" %
          (word, getGameLink.total, p, len(links), sleep_sec))
    add_eq(eqKey, links)
    time.sleep(sleep_sec)
    p = p + 1
    getGameLink(word, p)

コード例 #23

0

ファイルを表示

def read_from_sentence_txt(start, emission, transition):
    ## ./result/sentence.txt
    print('read from sentence.txt')
    with codecs.open(SENTENCE_FILE, 'r', 'utf-8') as fin:
        while True:
            line = fin.readline()
            if not line: break
            line = regex.sub(u"[_《》“”]", r"", line.strip().split('\t')[2])
            if line[-1] in ['，', '：', '？', '！', '。']:
                line = line[:-1]
            if len(line) < 2:
                continue
            ## for start
            start.setdefault(line[0], 0)
            start[line[0]] += 1

            ## for emission
            pinyin = Pinyin()
            pnyns = pinyin.get_pinyin(line, " ").split()
            hanzis = [c for c in line]
            # print(pnyns, hanzis)

            for hanzi, pinyin in zip(hanzis, pnyns):
                emission.setdefault(hanzi, {})
                emission[hanzi].setdefault(pinyin, 0)
                emission[hanzi][pinyin] += 1

            ## for transition
            for f, t in zip(line[:-1], line[1:]):
                transition.setdefault(f, {})
                transition[f].setdefault(t, 0)
                transition[f][t] += 1

コード例 #24

0

ファイルを表示

ファイル: facts.py プロジェクト: yjy-1999/MyQuant

def fromCodeToName(factors, codes):
    # 准备数据
    name = factors[factors.index.isin(codes)].name.values
    # 将汉字转换为拼音
    p = Pinyin()
    names = [p.get_pinyin(s) for s in name]
    return names

コード例 #25

0

ファイルを表示

def main():
    P = Pinyin()
    with open('aqi.json', 'r', encoding='utf-8') as f:
        aqi = json.load(f)

    for key in aqi.keys():
        print(key)
        prov_name = P.get_pinyin(key, '').capitalize()
        prov_name_path = Path.cwd().joinpath('aqi').joinpath(prov_name)
        # if not prov_name_path.exists(): prov_name_path.mkdir()
        # print(prov_name_path)
        if not os.path.exists(prov_name_path.as_posix()):
            os.makedirs(prov_name_path.as_posix())

        for city_name, city_values in aqi[key].items():
            print(city_name)
            city_list = []
            for cv in city_values.values():
                city_list.extend(cv)
            city_list = pd.DataFrame(np.array(city_list).reshape(-1, 9),
                                     columns=[
                                         'date', 'aqi', 'aqi_rank', 'pm25',
                                         'pm10', 'so2', 'no2', 'co', 'o3'
                                     ]).set_index('date')
            csv_name = prov_name_path.joinpath(city_name + '.csv')
            if not os.path.exists(csv_name.as_posix()):
                city_list.to_csv(csv_name)

コード例 #26

0

ファイルを表示

ファイル: facts.py プロジェクト: yjy-1999/MyQuant

def fromCNToEN(name):
    # 准备数据
    #name = factors[factors.index.isin(codes)].name.values
    # 将汉字转换为拼音
    p = Pinyin()
    names = [p.get_pinyin(s) for s in name]
    return names

コード例 #27

0

ファイルを表示

    def content_parse(self, response):
        item = PostDetail()
        url = response.url
        url = url.split('/')
        _id = url[-1].split('.')
        id = 'T' + _id[0]
        item['_id'] = 'T' + _id[0]
        #        item['title'] = response.xpath('//*[starts-with(@class, "h1_01")]/text()').extract()[0]
        spans = response.xpath(
            '//*[starts-with(@class, "details_01_l")]/span/text()').extract()
        if len(spans) == 3:
            item['author'] = spans[1]

        else:
            item['author'] = '佚名'

        tags = response.xpath(
            '//div[starts-with(@class, "lab_01 m_t_40")]/a/text()').extract()
        item['tag'] = tags
        for tag in tags:
            tag = tag.encode('utf-8')
            p = Pinyin()
            tags_id = p.get_pinyin(unicode(tag, "utf8"), '')
            tags_db = db.Tags
            tagmap_db = db.Tagmap
            if not tags_db.find_one({"_id": tags_id}):
                tag_data = {'_id': tags_id, 'tagname': tag}
                tags_db.insert(tag_data)
            if not tagmap_db.find_one({"_id": tags_id}):
                id_arr = []
                id_arr.append(id)
                tagmap_data = {'_id': tags_id, 'post_id': id_arr}
                tagmap_db.insert(tagmap_data)
            else:
                tagmap_db.update({"_id": tags_id},
                                 {"$addToSet": {
                                     "post_id": id
                                 }})

        p_list = response.xpath(
            '//*[starts-with(@class, "content_01 m_t_30 detasbmo")]/p')
        p_content = []
        for p in p_list:
            if p.xpath('img/@src').extract():

                p_content.append({
                    'type': 'img',
                    'body': p.xpath('img/@src').extract()
                })
            else:
                p_content.append({
                    'type': 'text',
                    'body': p.xpath('text()').extract()
                })

        item['content'] = p_content
        item['comment'] = []  # 评论 类型为一个列表
        item['coll'] = 'PostDetail'

        yield item

コード例 #28

0

ファイルを表示

def excel_to_mysql(ex_path, table_name):
    engine = create_engine('mysql+pymysql://root:111111@localhost:3306/oceans')
    # 读取本地CSV文件
    df = pd.read_excel(ex_path, encoding="utf-8", sep="\t")

    #将新建的DataFrame储存为MySQL中的数据表，不储存index列(index=False)
    # if_exists:
    # 1.fail:如果表存在，啥也不做
    # 2.replace:如果表存在，删了表，再建立一个新表，把数据插入
    # 3.append:如果表存在，把数据插入，如果表不存在创建一个表！！
    # pd.io.sql.to_sql(df, 'example', con=engine, index=False, if_exists='replace')

    columns0 = df.columns.tolist()
    from xpinyin import Pinyin
    p = Pinyin()
    # default splitter is `-`
    columns1 = list(map(lambda x: p.get_pinyin(x, ""), columns0))
    df.columns = columns1
    rows = df.shape[0]

    dt = datetime.datetime.now().strftime('%Y/%m/%d %H:%M')
    df1 = pd.DataFrame({'time': [dt for i in range(rows)]})

    df2 = pd.concat([df, df1], axis=1)
    print(df2)
    df2.to_sql(table_name,
               con=engine,
               index=True,
               index_label='id',
               if_exists='replace')

コード例 #29

0

ファイルを表示

ファイル: citys.py プロジェクト: zhangchunsheng/webapp

def insertData():
	print "insertData";
	try:
		file = open("citys.txt", "r");# w a wb二进制
		
		cursor = conn.cursor();
		
		sql = "truncate table weather_citys";
		cursor.execute(sql);
		cursor.execute("SET NAMES utf8");
		cursor.execute("SET CHARACTER_SET_CLIENT=utf8");
		cursor.execute("SET CHARACTER_SET_RESULTS=utf8");
		conn.commit();
		
		fileList = file.readlines();
		p = Pinyin();
		date = int(time.mktime(datetime.datetime.now().timetuple()));
		bz = 1;
		for fileLine in fileList:
			cityInfo = fileLine.split("=");
			cityCode = cityInfo[0];
			cityName = cityInfo[1];
			spellName = p.get_pinyin(cityName.decode("utf-8"), '');
			sql = "insert into weather_citys(cityCode,cityName,spellName,date,bz) values ('%s','%s','%s','%s','%s')" % (cityCode,cityName,spellName.encode("utf-8"),date,bz);
			cursor.execute(sql);
			conn.commit();

		file.close();
		cursor.close();
		conn.close();
	except (mdb.Error, IOError), e:
		print "Error %d: %s" % (e.args[0], e.args[1]);
		sys.exit(1);

コード例 #30

0

ファイルを表示

ファイル: load_term.py プロジェクト: you-n-g/ir362web

def _load_term_to_db(fileName):
    p = Pinyin()
    count = 0
    with open(fileName) as fp:
        for line in fp:
            print count
            count += 1
            if(count < 3225):
                continue
            try:
                line = line.decode('gbk')
            except Exception, err:
                print line, err
                continue
            terms = line.split()
            for term in terms:
                if len(term) <= 1:
                    continue
                pinyin = p.get_pinyin(term, "")
                cond = {"_id": term}
                if db.get_term(cond):
                    to = {"$inc": {"accFrequence": 1}}
                    db.update_term(cond, to)
                else:
                    data = {"_id": term,
                            "pinyin": pinyin,
                            "accFrequence": 1,
                            "queryFrequence": 0}
                    db.insert_term(data)

コード例 #31

0

ファイルを表示

def Change_ToPinYin(work):
    test = Pinyin()
    # print(work)
    string_q = test.get_pinyin(u"" + work, '')
    string_q = string_q.replace(' ', '_')
    # print(string_q)
    return string_q

コード例 #32

0

ファイルを表示

    def moveButton(self, button_name="成都市"):
        pinyin = Pinyin()
        city_name = re.sub('-', '', pinyin.get_pinyin(button_name))
        # ui.City_Dict[city_name] 城市按钮的号数
        ui.City_Button[ui.City_Dict[city_name]].move(
            2 + 180 * ui.City_Dict[city_name], 6)
        ui.City_Button[ui.City_Dict[city_name]].setStyleSheet(
            "border-radius:10px;"
            "text-align:bottom;"
            "padding-bottom:2px;"
            f"background-image:url(./city_pictures/{city_name}.png);"
            "color: rgb(97, 101, 247);font:Bold 12.5pt '等线';")

        # 记录新按下的按钮作为老的按钮，下一次按下其他按钮时，可以对老按钮进行还原
        # 它就像一个开关，按下新按钮时关闭旧按钮
        self.button_flag["old"] = self.button_flag["new"]
        # 当按下同一个按钮时不做按钮还原操作
        if self.button_flag["old"] != city_name + "_" + str(
                ui.City_Dict[city_name]):
            # 按钮还原函数
            self.Reduction(self.button_flag)
        self.button_flag["old"] = self.button_flag["new"]
        self.button_flag["new"] = city_name + "_" + str(
            ui.City_Dict[city_name])

        self.weatherInfo(button_name)  # 天气温度信息获取整理 weatherInfo

        # 以下函数是按下城市按钮，其他控件做出的相应
        self.airButton()

        # 生成新的网页，并在软件界面刷新新的网页
        self.createHtml(button_name)

コード例 #33

0

ファイルを表示

def getFullPin():
    print u'[-]姓名转全拼'
    pinyin = Pinyin()
    for name in cnnames:
        py = pinyin.get_pinyin(name.strip(), '')
        print '{}---{}'.format(name.strip().decode('utf-8').encode('gbk'), py)
        username.add(py)

コード例 #34

0

ファイルを表示

def main():
    # provice_file_path = r"C:\Users\my\Desktop\SPIDER\20_保险公司\各省HTML\青海.txt"
    # prefix_url = "http://qinghai.circ.gov.cn/tabid"
    # Province = "青海"

    for file in os.listdir("./各省HTML"):
        provice_file_path = os.path.join(
            r"C:\Users\my\Desktop\SPIDER\20_保险公司\各省HTML", file)
        name = file.split(".")[0]
        pin = Pinyin()
        prefix_url = "http://{}.circ.gov.cn/tabid".format(
            pin.get_pinyin(name).replace("-", ''))
        urls = get_urls(prefix_url, provice_file_path)
        l = len(urls)
        count = 1
        for url in urls:
            time.sleep(0.2)
            print(count, l, url)
            count += 1
            try:
                insurance_info = parse_url(url)
                insurance_info["Province"] = name
                save_to_db(insurance_info)
            except Exception as e:
                logger.info(url)
                logger.info(e)
            continue

コード例 #35

0

ファイルを表示

ファイル: views.py プロジェクト: hhr66/mls

def editnode(request):
    if "name" not in request.session: return HttpResponseRedirect('/login')
    username = request.session["email"].split('@')[0]
    treeid = request.GET.get('treeid',)
    zh_name = request.GET.get('name',)
    p = Pinyin()
    en_name = p.get_pinyin(zh_name,'')
    Mtree.objects.filter(id=int(treeid)).update(zh_name=zh_name,en_name=en_name)
    return HttpResponse('ok')

コード例 #36

0

ファイルを表示

ファイル: py2_adjusted_cutterDictionary2shortWord.py プロジェクト: 2877992943/add_match_scrawl_project

 def word2pinyin(word):
     from xpinyin import Pinyin
     p = Pinyin()
     #word='寿宝庄'
     if isinstance(word,unicode)==False:
         word=unicode(word,'utf-8')
     ping=p.get_pinyin(word,' ')
     #print ping #'shou bao zhuang'
     return ping.replace(' ','')

コード例 #37

0

ファイルを表示

ファイル: identify_city.py プロジェクト: CollComm/Tuan

	def load(self, lashou_city_list):
		result = []
		p = Pinyin()
		tree = ET.parse(lashou_city_list)
		root = tree.getroot()
		for city in root.iter('city'):
			city_id = city.find('id').text
			city_name = city.find('name').text
			result.append( [city_id, city_name, p.get_pinyin(city_name)] )
		return result

コード例 #38

0

ファイルを表示

ファイル: views.py プロジェクト: TeaWhen/yanshen

def welcome(request):
    if request.user.is_authenticated():
        return redirect('/')
    appname = u"延伸"
    pagename ='welcome'
    if request.method == 'POST':
        username = request.POST['username']
        password = request.POST['password']
        action = request.POST['action']
        if username == '' or password =='' or action == '':
            message = '抱歉，服务器开小差了，注册失败。'
            return locals()
        if action == 'login':
            user = authenticate(username=username, password=password)
            if user is not None:
                if user.is_active:
                    login(request, user)
                    return redirect('/')
                else:
                    message = '账户已被注销。'
                    return locals()
            else:
                message = 'Email 或密码错误。'
                return locals()
        elif action == 'reg':
            if Profile.objects.filter(email=username).exists():
                message = '您已经注册过了。'
                return locals()
            else:
                try:
                    user = Profile.objects.create_user(email=username, password=password)
                    user.first_name = request.POST['firstname']
                    user.last_name = request.POST['lastname']
                    p = Pinyin()
                    user.pinyin = p.get_pinyin(user.last_name + user.first_name, ' ')
                    for char in user.last_name + user.first_name:
                        user.pinyin += p.get_initials(char)
                    user.contact_info = json.JSONEncoder().encode({"next_id":2, "data":[{"info_id":1, "type":"Email", "key": u"电子邮箱", "value": user.email}]})
                    user.save()
                    category = Category.objects.create(name=u'未分组', owner=user, privilege='{"1":false}')
                    category.save()
                except ValidationError:
                    message = '请输入正确的 Email 地址。'
                    return locals()
                if user:
                    user = authenticate(username=username, password=password)
                    login(request, user)
                    return redirect('/me/?first=1')
                else:
                    message = '抱歉，服务器开小差了，注册失败。'
                    return locals()
    else:
        return locals()

コード例 #39

0

ファイルを表示

ファイル: identify_city.py プロジェクト: CollComm/Tuan

	def construct_city_set(self, lashou_city_list, unlisted):
		result = []
		p = Pinyin()
		tree = ET.parse(lashou_city_list)
		root = tree.getroot()
		for city in root.iter('city'):
			city_name = city.find('name').text
			result.append( [city_name, p.get_pinyin(city_name)] )

		fhandler = open(unlisted, "r")
		lines = fhandler.readlines()
		fhandler.close()
		for line in lines:
			city_name = line.strip()
			city_name = unicode( city_name, "utf-8" )
			result.append( [city_name, p.get_pinyin(city_name)] )

		city_index = 1
		for city in result:
			print city_index, "\t", city[0], "\t", city[1]
			city_index = city_index + 1

コード例 #40

0

ファイルを表示

ファイル: event.py プロジェクト: zy4290/qilaihi.club

def query_fulltext_code(code):
    pinyin = Pinyin()
    initials = []
    for letter in pinyin.get_initials(code, splitter=' ').lower().split(' '):
        if letter.isalpha():
            initials.append(letter * 4)
    logging.debug(initials)
    analysed_code = pinyin.get_pinyin(code, splitter=u' ') + ' ' + ' '.join(initials)
    logging.debug(analysed_code)
    clause = "MATCH(`codepinyin`, `codepinyininitials`) AGAINST (%s)"
    query = yield dbutil.do(Event.select(SQL('*, ' + clause + ' AS similarity', analysed_code)).where(
        SQL(clause, analysed_code)).limit(4).dicts)
    events = [event for event in query]
    logging.debug(events)
    return events

コード例 #41

0

ファイルを表示

ファイル: util.py プロジェクト: lutris/website

def slugify(text):
    """Version of slugify that supports Japanese and Chinese characters"""
    if not text:
        return ""
    slug = django_slugify(text)
    if not slug:
        # Title may be in Japanese
        slug = django_slugify(romkan.to_roma(text))
    if not slug:
        # Title may be in Chinese
        pinyin = Pinyin()
        slug = django_slugify(pinyin.get_pinyin(text))
    if not slug:
        # Try transliterate which supports Cyryllic, Greek and other alphabets
        slug = django_slugify(translit(text, reversed=True))
    return slug[:50]

コード例 #42

0

ファイルを表示

ファイル: eve_mod.py プロジェクト: zxc111/qqbot-py

 def translation_cn_to_en(self, cn):
     p = Pinyin()
     only = re.compile(ur"[\u4e00-\u9fa5a-z0-9A-Z/-]+")
     cn = only.findall(cn)
     pinyin = p.get_pinyin(cn[0], '')
     self.cur.execute("select * from map where pinyin like '%s%%'" % pinyin)
     data = self.cur.fetchall()
     print data
     if len(data) == 0:
       return False, u"%s 星系不存在" % cn[0], ""
     else:
         en = data[0][1].strip(" ")
         en = en.replace(" ", "_", data.count(" "))
         cn = data[0][2].strip(" ")
         number = data[0][0]
         return en , cn, number

コード例 #43

0

ファイルを表示

ファイル: backend.py プロジェクト: everpcpc/bgmcli

    def __init__(self, email, password):
        self._session = BangumiSession(email, password)
        self._colls = self._session.get_dummy_collections('anime', 3)
        # add pinyin to valid titles and setup auto correction behaviors
        pinyin = Pinyin()
        for coll in self._colls:
            if not coll.subject.ch_title:
                continue
            pinyin_title = pinyin.get_pinyin(coll.subject.ch_title, '')
            if not coll.subject.other_info.get('aliases'):
                coll.subject.other_info['aliases'] = [pinyin_title]
            else:
                coll.subject.other_info['aliases'].append(pinyin_title)
            corrections.update({pinyin_title: coll.subject.ch_title})

        self._titles = set()
        self._update_titles()

コード例 #44

0

ファイルを表示

ファイル: populate.py プロジェクト: toway/mba

def create_univs():
    if DBSession.query(Univs).count() == 0:
        p = Pinyin()
        univs_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "univs.txt")
        with codecs.open(univs_path, "r", "utf8") as unvis_file:
            for line in unvis_file:
                line = line[0:len(line)-1]
                cc = '#'
                py = p.get_pinyin(line, cc)
                p1 = ""
                p2 = ""
                if cc in py:
                    p2 = ''.join([c[0] for c in py.split(cc)])
                    p1 = py.replace(cc,'')
                u = Univs(name=line, pinyin=p1, pprev=p2)
                DBSession.add(u)
        DBSession.flush()

コード例 #45

0

ファイルを表示

ファイル: homework1.py プロジェクト: yoyo-C/python_code

def get_name(input_file, output_file):
    # get chinese name and transform into Pinyin
    print "Please enter your name in Chinese!"
    name_in_chinese = raw_input("> ").decode(sys.stdin.encoding or locale.getpreferredencoding(True))
    p = Pinyin()  # create instance of Pinyin class
    name_in_pinyin = p.get_pinyin(name_in_chinese, '') # translate chinese into Pinyin and get rid of '-'

    input_fd = open(input_file) 
    input_fd.readline() # open file and read the first line

    lessons = [u"框架原理","Python", u"用例编写","Keywordsrunner", "Selenium"]
    total_lessons = len(lessons)
    line_num = 1  # count the row you are in

    while True:
        lesson_record = input_fd.readline().strip()
        lesson_record_list = lesson_record.split(',')
        line_num += 1

        if lesson_record == '':
            print "Cannot find your name."
            input_fd.close()
            return

        else:
            if lesson_record_list[1] == name_in_pinyin:
                print u"%s在第%d行，报名课程有:" % (name_in_chinese, line_num),
                context = u"%s在第%d行，报名课程有：" % (name_in_chinese, line_num)

                count = 0
                for i in range(total_lessons):
                    # Lesson fields started from 3rd column
                    if lesson_record_list[i + 2] == '1':
                        count += 1
                        print "%s," % lessons[i],
                        context = context + "%s, " % lessons[i]

                print u"总共%d课." % count
                context = context + u"总共%d课." % count
                input_fd.close()

                output_fd = open(output_file, 'wb')
                output_fd.write(context.encode('gbk'))
                output_fd.close()
                return

コード例 #46

0

ファイルを表示

ファイル: enhance_pandas.py プロジェクト: apetitdev/python-tools

 def get_pinyin(self, df, series, inplace=True):
     """
     Will translate chinese characters into latin alphabet
     :param inplace: <BOOLEAN> If we want to replace the initial string by the translated string
      we don't touch this param but if we want to keep old data then this need to be false
     :param df: <DF> Pandas DataFrame
     :param series: <LIST<STRING>> list of serie to translate into latin alphabet
     :return: <DF> Pandas DataFrame
     """
     data = df.to_dict('records')
     p = Pinyin()
     for serie in series:
         if df[serie].dtypes != object:
             continue
         for idx, el in enumerate(data):
             new_serie = serie if inplace else serie + "_translate"
             data[idx][new_serie] = p.get_pinyin(el[serie], '')
     return pd.DataFrame(data)

コード例 #47

0

ファイルを表示

ファイル: homepage.py プロジェクト: luckistmaomao/webmaster

def people(filename):
    space = u"&nbsp;&nbsp;&nbsp;&nbsp;"
    with codecs.open(filename, encoding="utf-8") as infile:
        names = [line.strip() for line in infile]
    td_format = u"<td width=\"172\">{0}（<a href=\"javascript:toto('{1}')\"><img src=\"img/email.gif\" /></a>）</td>"
    for index, name in enumerate(names):
        p = Pinyin()
        pinyin = p.get_pinyin(name," ").split()
        if len(name) == 2:
            pinyin_abbrev = pinyin[0] + pinyin[1][0]
            name = name[0] + space + name[1]
        elif len(name) >= 3:
            pinyin_abbrev = pinyin[0] + "".join(item[0] for item in pinyin[1:])
        td = td_format.format(name, pinyin_abbrev)
        print td
        if (index+1) % 3 == 0 and index < len(names)-1:
            print
            print "<td width=\"98\">&nbsp;</td>"

コード例 #48

0

ファイルを表示

ファイル: pm2_5.py プロジェクト: zhangchunsheng/webapp

def getCities():
	'''getCities'''
	
	cursor = conn.cursor();
	
	tableName = "pm2_5_cities";
	
	cursor.execute("truncate table %s" % tableName);
	cursor.execute("SET NAMES utf8");
	cursor.execute("SET CHARACTER_SET_CLIENT=utf8");
	cursor.execute("SET CHARACTER_SET_RESULTS=utf8");
	conn.commit();
	
	saveFileName = "insert_cities.sql";
	file = codecs.open(saveFileName, "w", 'utf-8');
	
	fileName = "cities.json";
	jsons = readJson(fileName);
	if(type(jsons) == DictionaryType and jsons.has_key("error")):
		print "no data";
		return;
	cities = jsons["cities"];
	
	p = Pinyin();
	
	for city in cities:
		cityCode = "";
		cityName = city;
		spellName = p.get_pinyin(cityName, '');
		date = int(time.time());
		bz = 1;
		
		columns = "cityCode,cityName,spellName,`date`,bz";
		values = "'%s','%s','%s',%d,%d" % (cityCode,cityName,spellName,date,bz);
		sql = "insert into %s(%s) values (%s)" % (tableName,columns,values);
		sqlstring = "insert into %s(%s) values (%s);\n" % (tableName,columns,values);
		file.write(sqlstring);
		cursor.execute(sql.encode("utf-8"));
		conn.commit();
		
	file.close();
	cursor.close();
	conn.close();

コード例 #49

0

ファイルを表示

ファイル: build_corpus.py プロジェクト: bo1yuan/Somiao-Pinyin

def align(sent):
    '''
    Args:
      sent: A string. A sentence.
    
    Returns:
      A tuple of pinyin and chinese sentence.
    '''
    pinyin = Pinyin()
    pnyns = pinyin.get_pinyin(sent, " ").split()
    
    hanzis = []
    for char, p in zip(sent.replace(" ", ""), pnyns):
        hanzis.extend([char] + ["_"] * (len(p) - 1))
        
    pnyns = "".join(pnyns)
    hanzis = "".join(hanzis)
    
    assert len(pnyns) == len(hanzis), "The hanzis and the pinyins must be the same in length."
    return pnyns, hanzis

コード例 #50

0

ファイルを表示

ファイル: views.py プロジェクト: hhr66/mls

def addnode(request):
    if "name" not in request.session: return HttpResponseRedirect('/login')
    username = request.session["email"].split('@')[0]
    zh_name = request.GET.get('zh_name',)
    pid = request.GET.get('pid',)
    p = Pinyin()
    en_name = p.get_pinyin(zh_name,'')
    pnode = Mtree.objects.get(id=pid)
    pgen = pnode.gen
    pdeep = pnode.deep
    deep = pdeep + 1
    ret = Mtree.objects.create(pid=pid,deep=deep,zh_name=zh_name,en_name=en_name)
    id = ret.id
    gen = pgen + '_' + str(id)
    Mtree.objects.filter(id=id).update(gen=gen)
    #Mtree.objects.filter(id=int(treeid)).delete()
    showid = 2
    if deep == 4: showid = 1
    data = {'id':id,'showid':showid}
    data = json.dumps(data)
    return HttpResponse(data)

コード例 #51

0

ファイルを表示

ファイル: eventservice.py プロジェクト: zy4290/qilaihi.club

 def post(self):
     try:
         data = json.loads(self.request.body.decode())
         logging.debug(data)
         event = dict_to_model(Event, data)
         event.createtime = datetime.datetime.now()
         pinyin = Pinyin()
         event.codepinyin = pinyin.get_pinyin(event.code, splitter=' ')
         initials = []
         for letter in pinyin.get_initials(event.code, splitter=' ').lower().split(' '):
             if letter.isalpha():
                 initials.append(letter * 4)
         event.codepinyininitials = ' '.join(initials)
         yield dbutil.do(event.save)
         self.write(Response(
             status=1, msg='恭喜你，活动发布成功！',
             result={}
         ).json())
         ioloop.IOLoop.current().spawn_callback(event_service.create_qrcode, event)
     except Exception as e:
         self.write(Response(msg='sorry，亲，活动发布失败').json())
         logging.exception('CreateEventHandler error: {0}'.format(str(e)))

コード例 #52

0

ファイルを表示

ファイル: fields.py プロジェクト: bobohb/django-autoslug

    def pre_save(self, instance, add):

        # get currently entered slug
        value = self.value_from_object(instance)

        # autopopulate
        if self.always_update or (self.populate_from and not value):
            value = utils.get_prepopulated_value(self, instance)

            if __debug__ and not value:
                print 'Failed to populate slug %s.%s from %s' % \
                    (instance._meta.object_name, self.name, self.populate_from)

        # slug = self.slugify(value)
        # Convert Chinese characters to letters
        from xpinyin import Pinyin

        pinyin = Pinyin()
        slug = pinyin.get_pinyin(value, )

        if not slug:
            # no incoming value,  use model name
            slug = instance._meta.module_name

        assert slug, 'slug is defined before trying to ensure uniqueness'

        slug = utils.crop_slug(self, slug)

        # ensure the slug is unique (if required)
        if self.unique or self.unique_with:
            slug = utils.generate_unique_slug(self, instance, slug)

        assert slug, 'value is filled before saving'

        # make the updated slug available as instance attribute
        setattr(instance, self.name, slug)

        return slug

コード例 #53

0

ファイルを表示

ファイル: data.py プロジェクト: Bankq/nomen

    def parse(self):
        p = Pinyin()
        s = Hyphenator('en_US')
        with codecs.open(self.filepath, encoding='utf-8', mode='r') as f:
            for line in f:
                self.count = self.count + 1
                line = line[0:-1]
                words = line.split()
                if len(words) != 2:
                    print "Error on line", self.count
                    raise ValueError
                c = words[0].strip()
                e = words[1].strip().lower()

                self.ch.append(c)
                self.pinyin.append(p.get_pinyin(c, ' ').split())

                self.en.append(e)
                if len(e) > 3:
                    syll= s.syllables(e)
                    syll = self.sub_syllables(e, c, syll)
                else:
                    syll = [e]
                self.syllables.append(syll)

コード例 #54

0

ファイルを表示

ファイル: p.py プロジェクト: cringha/mobility-tools

from xpinyin import Pinyin

p = Pinyin()
v = p.get_pinyin("和面")

print(v)

コード例 #55

0

ファイルを表示

ファイル: pinyin.py プロジェクト: hupili/utility

# -*- coding: utf-8 -*-

from xpinyin import Pinyin
import sys
import re

p = Pinyin()

#print p.get_pinyin(u"上海")

for line in sys.stdin:
    word = line.strip()
    word = word.decode('utf-8')
    if ' ' in word:
        continue
    if '\t' in word:
        continue
    #for c in [u'(', u')', u'（', u'）']:
        #if c in word:
            #continue
    word_pinyin = p.get_pinyin(word, u' ')
    if re.match(r'^[a-z ]+$', word_pinyin):
        sys.stdout.write(('%s\t%s\t1\n' % (word, word_pinyin)).encode('utf-8'))
    else:
        continue

コード例 #56

0

ファイルを表示

ファイル: pinyin.py プロジェクト: ludonghai715/ludonghai

#先下载xpinyin模块, http://pypi.python.org/pypi/xpinyin/

from xlrd import open_workbook, cellname
from xlutils.copy import copy
from xpinyin import Pinyin

rb = open_workbook('hanzi.xls') 
rs = rb.sheet_by_index(0)
wb = copy(rb)
ws = wb.get_sheet(0)
p = Pinyin()
for x in range(rs.nrows):
    hanzi= rs.cell(x, 0).value
    pinyin = p.get_pinyin(hanzi) 
    ws.write(x, 1, pinyin)

wb.save('pinyin.xls')
raw_input("press any key to exit")

コード例 #57

0

ファイルを表示

ファイル: script.py プロジェクト: drinkupthewine/Liyuwiki

def term_pinyin(word):
    word = digi_to_py(word)
    p = Pinyin()
    result = p.get_pinyin(word, ' ')
    return result

コード例 #58

0

ファイルを表示

ファイル: citys.py プロジェクト: zhangchunsheng/didiwuliu_server

def insertData():
	print "insertData";
	try:
		file = open("citys.txt", "r");# w a wb二进制
		
		cursor = conn.cursor();
		
		sql = "truncate table dict_citys";
		cursor.execute(sql);
		cursor.execute("SET NAMES utf8");
		cursor.execute("SET CHARACTER_SET_CLIENT=utf8");
		cursor.execute("SET CHARACTER_SET_RESULTS=utf8");
		conn.commit();
		
		fileList = file.readlines();
		p = Pinyin();
		date = int(time.mktime(datetime.datetime.now().timetuple()));
		bz = 1;
		
		cityCode = "";
		cityName = "";
		spellName = "";
		level = 1;#1 - 省级 2 - 市级 3 - 县级
		customCode = "";
		parentCode = "";
		parentId = "";
		
		spaceCount = 0;
		space = " ";
		lastSpaceCount = 0;
		provinceIndexCount = 0;
		cityIndexCount = 0;
		countyIndexCount = 0;
		provinceCode = "";
		customCityCode = "";
		
		for fileLine in fileList:
			#print fileLine;
			spaceCount = fileLine.count(" ");
			if(spaceCount == 4):
				provinceIndexCount += 1;
				parentCode = "0";
				parentId = 0;
				customCode = str(provinceIndexCount).zfill(3);
				provinceCode = customCode;
				space = "    ";
				level = 1;
			elif(spaceCount == 6):
				if(lastSpaceCount == 4):
					cityIndexCount = 0;
				
				cityIndexCount += 1;
				parentCode = provinceCode;
				customCode = provinceCode + str(cityIndexCount).zfill(3);
				customCityCode = customCode;
				space = "      ";
				level = 2;
			elif(spaceCount == 8):
				if(lastSpaceCount == 6):
					countyIndexCount = 0;
				
				countyIndexCount += 1;
				parentCode = customCityCode;
				customCode = customCityCode + str(countyIndexCount).zfill(3);
				space = "        ";
				level = 3;
			
			cityInfo = fileLine.split(space);
			cityCode = cityInfo[0];
			cityName = cityInfo[1];
			
			spellName = p.get_pinyin(cityName.decode("utf-8"), '');
			sql = "insert into dict_citys(cityCode,cityName,spellName,customCode,parentCode,parentId,date,bz) values ('%s','%s','%s','%s','%s',%d,'%s','%s')" % (cityCode,cityName,spellName.encode("utf-8"),customCode,parentCode,parentId,date,bz);
			cursor.execute(sql);
			if(spaceCount == 4):
				parentId = conn.insert_id();
			elif(spaceCount == 6):
				parentId = conn.insert_id();
			conn.commit();
			
			lastSpaceCount = spaceCount;

		file.close();
		cursor.close();
		conn.close();
	except (mdb.Error, IOError), e:
		print "Error %d: %s" % (e.args[0], e.args[1]);
		sys.exit(1);

コード例 #59

0

ファイルを表示

ファイル: pinyinTest.py プロジェクト: crusaderg/MyPythonStudy

from xpinyin import Pinyin

pinyinLib = Pinyin()

with open( 'D:\\text.txt', 'r' ) as srcFile, open( 'D:\\pinyin.txt', 'w' ) as desFile:
	for line in srcFile:
		parenthesisLength = []
		words = line.split()
		for word in words:
			result = pinyinLib.get_pinyin( word, splitter = '   ', show_tone_marks = True )
			desFile.writelines( result + '\t\t' )						
			parenthesisLength.append( len(result) )
		
		desFile.writelines( '\n' )		
		
		for pLength in parenthesisLength:
			desFile.writelines( '(' + ''.join( [' ']*(pLength - 2) ) + ')' + '\t\t' )

コード例 #60

0

ファイルを表示

ファイル: spider.py プロジェクト: wangqiuyi/0517

                return data

es = ElasticSearch('http://localhost:9200/')
es.delete_index('pet')
spider = Spider()
breeds = spider.getPetBreeds()
p = Pinyin()
for breed in breeds:
    flg = 1
    page = 1
    pet_list = []
    while(flg):
        pets = spider.getPets(breed, (page - 1) * spider.limit)
        if not pets:
            flg = 0
        else:
            page = page + 1
            for pet in pets:
                pet_obj = {}
                pet_obj['name'] = pet['name']
                pet_obj['img'] = pet['img']
                pet_obj['type'] = breed['ename'] 
                pet_list.append(pet_obj)
                #print pet['name'] + '\t' + p.get_pinyin(pet['name'], '')
    print breed['ename'] + '\n'
    if not pet_list:
        continue
    doc_type = p.get_pinyin(breed['ename'].replace('宠物', ''), '')
    es.bulk((es.index_op(pet_obj) for pet_obj in pet_list), doc_type=doc_type, index = 'pet')
es.refresh('pet')