def render_plain(self, fp, queries): queries_list = list(queries.keys()) queries_list.sort() for query in queries_list: for test in queries[query]: fp.write(u"-- %s\n" % unicode(test)) fp.write(unicode(query.replace('\n', ' ').strip() + '\n'))
def process_data(self, strs, type=None): if not isinstance(strs, list): strs = [strs] rt = [] for s in strs: try: rt.append( pd.read_csv(StringIO(unicode(s.decode('gbk'))), sep="\t", dtype={ "证券代码": str, "证券数量": int, "可卖数量": int })) except Exception as e: pass if not type: # 订单或者行情返回合并的data frame if type == 'order' or type == 'quote': if (len(rt) == 0): return pd.DataFrame() else: return pd.concat(rt) if len(rt) == 1: rt = rt[0] return rt
def Association(text): """ Finds Associations in the text Called from :py:func:`intent` function. Finds the following objects if present in the query: * Number Association : Uses regular expressions. * Time Reference Association: Uses `en_core_web_sm <https://spacy.io/usage/models>`_ model from SpaCy for NLP to detect time referred objects in query. Parameters ---------- text : string The query string asked is passed as argument. Eg : "COUNTRIES HAVING MROI GREATER THAN 1 FOR LAST 2 YEARS" Returns ------- List : A list of tuples List of tuples giving all the associations found in the query. Eg : [("GREATER THAN",1),("LAST YEARS",2)] """ # Number Comparison Association pattern = "GREATER THAN \d+\.?\d*\s?\d*|LESS[ER]* THAN \d+\.?\d*\s?\d*|MORE THAN \d+\.?\d*\s?\d*|EQUAL TO \d+\.?\d*\s?\d*|SMALL[ER]* THAN \d+\.?\d*\s?\d*|ABOVE \d+\.?\d*\s?\d*|BELOW \d+\.?\d*\s?\d*|NOT EQUAL TO \d+\.?\d*\s?\d*" numComparison = [numExtract(match) for match in re.findall(pattern,text)] # Time Association doc = nlp(unicode(text)) timeReference = [e.string for e in doc.ents if 'DATE'==e.label_] timeReference = [numExtract(match) for match in timeReference] timeReference = [x for x in timeReference if x[1] not in Dimensions['YEAR']] return numComparison+timeReference
def slugify(text, delim=u'-'): """Generates an slightly worse ASCII-only slug.""" result = [] for word in _punct_re.split(text.lower()): word = normalize('NFKD', word).encode('ascii', 'ignore') if word: result.append(word) return unicode(delim.join(result))
def push_string(s): if USE_PYTHON_UNICODE: if type(s) is UnicodeType: return s.encode('utf-16be') else: return unicode(s, 'iso-8859-1').encode('utf-16be') else: return unicode.latin1_to_unicode(s)
def orders(self): df, err = self.api.QueryDatas(self.clientID, [TODAY_ENTRUSTMENT]) df = self.process_data(df) rt = {} for index, row in df.T.iteritems(): if row["报价方式"] != "买卖": continue order_id = row["委托编号"] mul = -1 if row["买卖标志"] == 1 else 1 rt[order_id] = Order( dt=unicode(pd.to_datetime("today").date()) + " " + unicode(row["委托时间"]), # TODO timezone, zerorpc can't serialize datetime symbol=unicode(row["证券代码"]), name=unicode(row["证券名称"], 'utf8'), status=unicode(row["状态说明"], 'utf8'), price=row["委托价格"], amount=mul * row["委托数量"], order_id=row["委托编号"], average_cost=row["成交价格"], filled=mul * row["成交数量"]) return rt
def pop_string(p, len, chars=0): if chars == 1: len = len * 2 s = p[:len] if USE_PYTHON_UNICODE: try: # Replace unknown characters with the official Unicode # replacement character, U+FFFD ret = unicode(s, 'utf-16be', 'replace') except UnicodeError: return ('Unicode error', p[len:]) else: return (ret, p[len:]) else: ret = unicode.unicode_to_latin1(s) return (ret, p[len:])
def predict(self, message): # 支持不论在python2还是python3下训练的模型都可以在2或者3的环境下运行 content = unicode(message) data = [self.word_to_id[x] for x in content if x in self.word_to_id] feed_dict = { self.model.input_x: kr.preprocessing.sequence.pad_sequences([data], self.config.seq_length), self.model.keep_prob: 1.0 } y_pred_cls = self.session.run(self.model.y_pred_cls, feed_dict=feed_dict) return self.categories[y_pred_cls[0]]
def read_labels(data_file): labels = [] lines = [] with open(data_file, 'r') as f: for line in f: labels_line = [] words_line = [] try: line = unicode(line, "UTF-8").split() except NameError: line = line.split() for word in line: if word.startswith("__label__"): labels_line.append(word) else: words_line.append(word) labels.append(labels_line) lines.append(" ".join(words_line)) return lines, labels
def _transactions(self, start_date, end_date): today = pd.to_datetime('today') today_str = today.strftime('%Y%m%d') rt = {} today_trans = True if today_str == start_date and today_str == end_date else False if today_trans: df, err = self.api.QueryData(self.clientID, TODAY_TRANSACTION) df = self.process_data(df) else: df, err = self.api.QueryHistoryData(self.clientID, HISTORY_TRANSACTION, start_date, end_date) df = self.process_data(df) mask = (df["买卖标志.1"] == "证券卖出") | (df["买卖标志.1"] == "证券买入") df = df[mask] for index, row in df.T.iteritems(): id = row["成交编号"] sign = -1 if row["买卖标志"] == 1 else 1 if today_trans: commission = row["成交金额"] * 0.0012 dt = str(today.date()) + " " + row["成交时间"] else: commission = row["佣金"] + row["过户费"] + row["印花税"] + row[ "经手费"] + row["证管费"] dt = str( datetime.datetime.strptime(str( row["成交日期"]), "%Y%m%d").date()) + " " + row["成交时间"], rt[id] = Transaction(id=id, asset=unicode(row["证券代码"]), amount=sign * row["成交数量"], dt=dt, price=row["成交价格"], order_id=row["委托编号"], commission=commission) return rt
import unicode string.ascii_letters string.digits import random pwd = ".".join( random.choice(string.ascii_letters + string.digits) for _ in range(25)) # pip freeze #Encoding / Decoding s = "hello byte string" u = unicode(s) backToBytes = u.encode() s = "hello normal string" u = unicode(s, "utf-8") backToBytes = u.encode("utf-8") hindiString = u"बिपाशा और करण सिंह को दो टीवी शो होस्ट करने का ऑफर" strhindiString = "बिपाशा और करण सिंह को दो टीवी शो होस्ट करने का ऑफर" type(hindiString) type(strhindiString) hindiString.encode(encoding='UTF-8', errors='strict') strhindiString.decode(encoding='UTF-8', errors='strict')
with open(f) as f: for line in f: window, line = line.decode('utf-8').split('\t', 1) m = SentenceMatch.from_string(line) if m.sentence_score < 0.4: continue s, t = read_info(m.info) if s == t: continue s_nopunct = first_norm(s) t_nopunct = first_norm(t) s_normed = second_norm(s_nopunct) t_normed = second_norm(t_nopunct) stoks = set(s_normed) ttoks = set(t_normed) if len(stoks & ttoks) in (len(stoks), len(ttoks)): # one is subset of the other if ' '.join(s_nopunct) in ' '.join(t_nopunct) or ' '.join(t_nopunct) in ' '.join(s_nopunct): #substring not interesting continue # check substantial unnormalised distances ratio = len(s) / len(t) if ratio > LENGTH_RATIO or ratio < INV_LR: if ratio < 1: ratio = 1/ratio print(ratio, unicode(m).encode('utf-8'), sep='\t')
def get_id(self): return unicode(self.id)
def startTest(self, test): self.handler.current_test = unicode(test) logger = logging.getLogger('sqlalchemy.engine') self.current_level = logger.getEffectiveLevel() logger.setLevel(logging.INFO) logger.addHandler(self.handler)
# -*- coding:utf-8 -*- from bs4 import BeautifulSoup import unicode #compile(r".*Rarity:") for label in labels: # loop thru all the labels # store the values associated with the labels in card if cardName.match(unicode(repr(label.string))): sibling = label.find_next_sibling(class_="value") assign(card, "name", sibling.string.strip()) elif manaCost.match(unicode(repr( label.string))) and not convManaCost.match( unicode(repr(label.string))): manaString = " " sibling = label.find_next_sibling(class_="value") for manaIcon in sibling.find_all('img'): manaString += manaIcon["alt"] + " " assign(card, "manaCost", manaString) elif types.match(unicode(repr(label.string))): sibling = label.find_next_sibling(class_="value") typestr = sibling.string.strip() assign(card, "types", typestr) elif cardText.match(unicode(repr(label.string))): eff = handleCardText(label.find_next_sibling(class_="value")) assign(card, "effects", eff) elif flavorText.match(unicode(repr(label.string))): sibling = label.find_next_sibling(class_="value") flav = "" for string in sibling.stripped_strings: flav += string