Python unicode 예제들

예제 #1

0

파일 보기

파일: loghandler.py 프로젝트: freshbooks/nose-sqlcapture

 def render_plain(self, fp, queries):
     queries_list = list(queries.keys())
     queries_list.sort()
     for query in queries_list:
         for test in queries[query]:
             fp.write(u"-- %s\n" % unicode(test))
         fp.write(unicode(query.replace('\n', ' ').strip() + '\n'))

예제 #2

0

파일 보기

파일: tdx_client.py 프로젝트: zhaofinance/cn_zipline

    def process_data(self, strs, type=None):
        if not isinstance(strs, list):
            strs = [strs]
        rt = []

        for s in strs:
            try:
                rt.append(
                    pd.read_csv(StringIO(unicode(s.decode('gbk'))),
                                sep="\t",
                                dtype={
                                    "证券代码": str,
                                    "证券数量": int,
                                    "可卖数量": int
                                }))
            except Exception as e:
                pass

        if not type:  # 订单或者行情返回合并的data frame
            if type == 'order' or type == 'quote':
                if (len(rt) == 0):
                    return pd.DataFrame()
                else:
                    return pd.concat(rt)

        if len(rt) == 1:
            rt = rt[0]
        return rt

예제 #3

0

파일 보기

def Association(text):
    """ Finds Associations in the text
    
    Called from :py:func:`intent` function. Finds the following objects if present in the query:
        * Number Association : Uses regular expressions.
        * Time Reference Association: Uses `en_core_web_sm <https://spacy.io/usage/models>`_ model from SpaCy for NLP to detect time referred objects in query.
    
    Parameters
    ----------
    text : string
        The query string asked is passed as argument.
        
    Eg : "COUNTRIES HAVING MROI GREATER THAN 1 FOR LAST 2 YEARS"
    
    Returns
    -------
    List : A list of tuples
        List of tuples giving all the associations found in the query.
        
    Eg : [("GREATER THAN",1),("LAST YEARS",2)]
    
    """
    # Number Comparison Association
    pattern = "GREATER THAN \d+\.?\d*\s?\d*|LESS[ER]* THAN \d+\.?\d*\s?\d*|MORE THAN \d+\.?\d*\s?\d*|EQUAL TO \d+\.?\d*\s?\d*|SMALL[ER]* THAN \d+\.?\d*\s?\d*|ABOVE \d+\.?\d*\s?\d*|BELOW \d+\.?\d*\s?\d*|NOT EQUAL TO \d+\.?\d*\s?\d*"
    numComparison = [numExtract(match) for match in re.findall(pattern,text)]
    # Time Association
    doc = nlp(unicode(text))
    timeReference = [e.string for e in doc.ents if 'DATE'==e.label_]
    timeReference = [numExtract(match) for match in timeReference]
    timeReference = [x for x in timeReference if x[1] not in Dimensions['YEAR']]
    return numComparison+timeReference

예제 #4

0

파일 보기

def slugify(text, delim=u'-'):
    """Generates an slightly worse ASCII-only slug."""
    result = []
    for word in _punct_re.split(text.lower()):
        word = normalize('NFKD', word).encode('ascii', 'ignore')
        if word:
            result.append(word)
    return unicode(delim.join(result))

예제 #5

0

파일 보기

파일: gale_pack.py 프로젝트: nandub/pygale

def push_string(s):
	if USE_PYTHON_UNICODE:
		if type(s) is UnicodeType:
			return s.encode('utf-16be')
		else:
			return unicode(s, 'iso-8859-1').encode('utf-16be')
	else:
		return unicode.latin1_to_unicode(s)

예제 #6

0

파일 보기

파일: tdx_client.py 프로젝트: zhaofinance/cn_zipline

    def orders(self):
        df, err = self.api.QueryDatas(self.clientID, [TODAY_ENTRUSTMENT])
        df = self.process_data(df)
        rt = {}
        for index, row in df.T.iteritems():
            if row["报价方式"] != "买卖":
                continue
            order_id = row["委托编号"]
            mul = -1 if row["买卖标志"] == 1 else 1
            rt[order_id] = Order(
                dt=unicode(pd.to_datetime("today").date()) + " " +
                unicode(row["委托时间"]),
                # TODO timezone, zerorpc can't serialize datetime
                symbol=unicode(row["证券代码"]),
                name=unicode(row["证券名称"], 'utf8'),
                status=unicode(row["状态说明"], 'utf8'),
                price=row["委托价格"],
                amount=mul * row["委托数量"],
                order_id=row["委托编号"],
                average_cost=row["成交价格"],
                filled=mul * row["成交数量"])

        return rt

예제 #7

0

파일 보기

파일: gale_pack.py 프로젝트: nandub/pygale

def pop_string(p, len, chars=0):
	if chars == 1:
		len = len * 2
	s = p[:len]
	if USE_PYTHON_UNICODE:
		try:
			# Replace unknown characters with the official Unicode
			# replacement character, U+FFFD
			ret = unicode(s, 'utf-16be', 'replace')
		except UnicodeError:
			return ('Unicode error', p[len:])
		else:
			return (ret, p[len:])
	else:
		ret = unicode.unicode_to_latin1(s)
		return (ret, p[len:])

예제 #8

0

파일 보기

    def predict(self, message):
        # 支持不论在python2还是python3下训练的模型都可以在2或者3的环境下运行
        content = unicode(message)
        data = [self.word_to_id[x] for x in content if x in self.word_to_id]

        feed_dict = {
            self.model.input_x:
            kr.preprocessing.sequence.pad_sequences([data],
                                                    self.config.seq_length),
            self.model.keep_prob:
            1.0
        }

        y_pred_cls = self.session.run(self.model.y_pred_cls,
                                      feed_dict=feed_dict)
        return self.categories[y_pred_cls[0]]

예제 #9

0

파일 보기

def read_labels(data_file):
    labels = []
    lines = []
    with open(data_file, 'r') as f:
        for line in f:
            labels_line = []
            words_line = []
            try:
                line = unicode(line, "UTF-8").split()
            except NameError:
                line = line.split()
            for word in line:
                if word.startswith("__label__"):
                    labels_line.append(word)
                else:
                    words_line.append(word)
            labels.append(labels_line)
            lines.append(" ".join(words_line))
    return lines, labels

예제 #10

0

파일 보기

파일: tdx_client.py 프로젝트: zhaofinance/cn_zipline

    def _transactions(self, start_date, end_date):
        today = pd.to_datetime('today')
        today_str = today.strftime('%Y%m%d')

        rt = {}

        today_trans = True if today_str == start_date and today_str == end_date else False
        if today_trans:
            df, err = self.api.QueryData(self.clientID, TODAY_TRANSACTION)
            df = self.process_data(df)
        else:
            df, err = self.api.QueryHistoryData(self.clientID,
                                                HISTORY_TRANSACTION,
                                                start_date, end_date)
            df = self.process_data(df)
            mask = (df["买卖标志.1"] == "证券卖出") | (df["买卖标志.1"] == "证券买入")
            df = df[mask]

        for index, row in df.T.iteritems():
            id = row["成交编号"]
            sign = -1 if row["买卖标志"] == 1 else 1
            if today_trans:
                commission = row["成交金额"] * 0.0012
                dt = str(today.date()) + " " + row["成交时间"]
            else:
                commission = row["佣金"] + row["过户费"] + row["印花税"] + row[
                    "经手费"] + row["证管费"]
                dt = str(
                    datetime.datetime.strptime(str(
                        row["成交日期"]), "%Y%m%d").date()) + " " + row["成交时间"],
            rt[id] = Transaction(id=id,
                                 asset=unicode(row["证券代码"]),
                                 amount=sign * row["成交数量"],
                                 dt=dt,
                                 price=row["成交价格"],
                                 order_id=row["委托编号"],
                                 commission=commission)
        return rt

예제 #11

0

파일 보기

파일: Class1_Examples(1).py 프로젝트: Saigurram235/Mastering-Python

import unicode

string.ascii_letters

string.digits

import random
pwd = ".".join(
    random.choice(string.ascii_letters + string.digits) for _ in range(25))

# pip freeze

#Encoding / Decoding

s = "hello byte string"
u = unicode(s)
backToBytes = u.encode()

s = "hello normal string"
u = unicode(s, "utf-8")
backToBytes = u.encode("utf-8")

hindiString = u"बिपाशा और करण सिंह को दो टीवी शो होस्ट करने का ऑफर"
strhindiString = "बिपाशा और करण सिंह को दो टीवी शो होस्ट करने का ऑफर"

type(hindiString)
type(strhindiString)

hindiString.encode(encoding='UTF-8', errors='strict')
strhindiString.decode(encoding='UTF-8', errors='strict')

예제 #12

0

파일 보기

파일: find_stem_subsets.py 프로젝트: schwa-lab/gigacluster

    with open(f) as f:
        for line in f:
            window, line = line.decode('utf-8').split('\t', 1)
            m = SentenceMatch.from_string(line)
            if m.sentence_score < 0.4:
                continue
            s, t = read_info(m.info)
            if s == t:
                continue

            s_nopunct = first_norm(s)
            t_nopunct = first_norm(t)

            s_normed = second_norm(s_nopunct)
            t_normed = second_norm(t_nopunct)
            stoks = set(s_normed)
            ttoks = set(t_normed)

            if len(stoks & ttoks) in (len(stoks), len(ttoks)):
                # one is subset of the other
                if ' '.join(s_nopunct) in ' '.join(t_nopunct) or ' '.join(t_nopunct) in ' '.join(s_nopunct):
                    #substring not interesting
                    continue

                # check substantial unnormalised distances
                ratio = len(s) / len(t)
                if ratio > LENGTH_RATIO or ratio < INV_LR:
                    if ratio < 1:
                        ratio = 1/ratio
                    print(ratio, unicode(m).encode('utf-8'), sep='\t')

예제 #13

0

파일 보기

파일: lib.py 프로젝트: ecalifornica/happybirthdaysohrob

 def get_id(self):
     return unicode(self.id)

예제 #14

0

파일 보기

파일: plugin.py 프로젝트: freshbooks/nose-sqlcapture

 def startTest(self, test):
     self.handler.current_test = unicode(test)
     logger = logging.getLogger('sqlalchemy.engine')
     self.current_level = logger.getEffectiveLevel()
     logger.setLevel(logging.INFO)
     logger.addHandler(self.handler)

예제 #15

0

파일 보기

파일: cardReader.py 프로젝트: edward-coombes/IndependentStudyF18

# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import unicode
#compile(r".*Rarity:")

for label in labels:
    # loop thru all the labels
    # store the values associated with the labels in card
    if cardName.match(unicode(repr(label.string))):
        sibling = label.find_next_sibling(class_="value")
        assign(card, "name", sibling.string.strip())
    elif manaCost.match(unicode(repr(
            label.string))) and not convManaCost.match(
                unicode(repr(label.string))):
        manaString = " "
        sibling = label.find_next_sibling(class_="value")
        for manaIcon in sibling.find_all('img'):
            manaString += manaIcon["alt"] + " "
        assign(card, "manaCost", manaString)
    elif types.match(unicode(repr(label.string))):
        sibling = label.find_next_sibling(class_="value")
        typestr = sibling.string.strip()
        assign(card, "types", typestr)
    elif cardText.match(unicode(repr(label.string))):
        eff = handleCardText(label.find_next_sibling(class_="value"))
        assign(card, "effects", eff)
    elif flavorText.match(unicode(repr(label.string))):
        sibling = label.find_next_sibling(class_="value")
        flav = ""
        for string in sibling.stripped_strings:
            flav += string