def predict_data_process(trigger_file, role_file, schema_file, save_path): """predict_data_process""" pred_ret = [] trigger_datas = read_by_lines(trigger_file) role_data = read_by_lines(role_file) schema_datas = read_by_lines(schema_file) print("trigger predict {} load from {}".format(len(trigger_datas), trigger_file)) print("role predict {} load from {}".format(len(role_data), role_file)) print("schema {} load from {}".format(len(schema_datas), schema_file)) schema = {} for s in schema_datas: d_json = json.loads(s) schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]] # process the role data sent_role_mapping = {} for d in role_data: d_json = json.loads(d) r_ret = extract_result(d_json["text"], d_json["pred"]["labels"]) role_ret = {} for r in r_ret: role_type = r["type"] if role_type not in role_ret: role_ret[role_type] = [] role_ret[role_type].append("".join(r["text"])) sent_role_mapping[d_json["id"]] = role_ret for d in trigger_datas: d_json = json.loads(d) t_ret = extract_result(d_json["text"], d_json["pred"]["labels"]) pred_event_types = list(set([t["type"] for t in t_ret])) event_list = [] for event_type in pred_event_types: role_list = schema[event_type] arguments = [] for role_type, ags in sent_role_mapping[d_json["id"]].items(): if role_type not in role_list: continue for arg in ags: if len(arg) == 1: continue arguments.append({"role": role_type, "argument": arg}) event = {"event_type": event_type, "arguments": arguments} event_list.append(event) pred_ret.append({ "id": d_json["id"], "text": d_json["text"], "event_list": event_list }) pred_ret = [json.dumps(r, ensure_ascii=False) for r in pred_ret] print("submit data {} save to {}".format(len(pred_ret), save_path)) write_by_lines(save_path, pred_ret)
def subscribe(self, symbol, func=None, fields="", data_format=""): """Subscribe securites This function adds new securities to subscribed list on the server. If success, return subscribed codes. If securities is empty, return current subscribed codes. """ r, msg = self._check_session() if not r: return (r, msg) if func: self._on_jsq_callback = func rpc_params = {"symbol": symbol, "fields": fields} cr = self._remote.call("jsq.subscribe", rpc_params) rsp, msg = utils.extract_result(cr, data_format="", class_name="SubRsp") if not rsp: return (rsp, msg) new_codes = [x.strip() for x in symbol.split(',') if x] self._subscribed_set = self._subscribed_set.union(set(new_codes)) self._schema_id = rsp['schema_id'] self._schema = rsp['schema'] self._sub_hash = rsp['sub_hash'] return (rsp['securities'], msg)
def _do_subscribe(self): """Subscribe again when reconnected or hash_code is not same""" if not self._subscribed_set: return codes = list(self._subscribed_set) codes.sort() # XXX subscribe with default fields! rpc_params = {"symbol": ",".join(codes), "fields": ""} cr = self._remote.call("jsq.subscribe", rpc_params) rsp, msg = utils.extract_result(cr, data_format="", class_name="SubRsp") if not rsp: #return (rsp, msg) return self._schema_id = rsp['schema_id'] self._schema = rsp['schema'] self._sub_hash = rsp['sub_hash'] #return (rsp.securities, msg) self._make_schema_map()
def batch_order(self, orders, algo="", algo_param={}, userdata=""): """ orders format: [ {"security": "000001.SZ", "action": "Buy", "price": 10.0, "size" : 100}, ... ] return (result, message) if result is None, message contains error information """ if not orders or not isinstance(orders, (list, tuple)): return (None, "empty order") if isinstance(orders[0], EntrustOrder): tmp = [] for o in orders: tmp.append({ "security": o.security, "price": o.price, "size": int(o.size) }) orders = tmp r, msg = self._check_session() if not r: return (None, msg) rpc_params = { "orders": orders, "algo": algo, "algo_param": json.dumps(algo_param), "user": self._username, "userdata": userdata } cr = self._remote.call("oms.batch_order", rpc_params) return utils.extract_result(cr)
def place_order(self, security, action, price, size, algo="", algo_param={}, userdata=""): """ return (result, message) if result is None, message contains error information """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = { "security": security, "action": action, "price": price, "size": int(size), "algo": algo, "algo_param": json.dumps(algo_param), "user": self._username, "userdata": userdata } cr = self._remote.call("oms.place_order", rpc_params) return utils.extract_result(cr)
def basket_order(self, orders, algo="", algo_param={}, userdata=""): """ orders format: [ {"security": "000001.SZ", "ref_price": 10.0, "inc_size" : 100}, ...] return (result, message) if result is None, message contains error information """ r, msg = self._check_session() if not r: return (False, msg) if type(orders) is pd.core.frame.DataFrame: tmp = [] for i in xrange(0, len(orders)): tmp.append({ 'security': orders.index[i], 'ref_price': float(orders['ref_price'][i]), "inc_size": int(orders['inc_size'][i]) }) orders = tmp rpc_params = { "orders": orders, "algo": algo, "algo_param": json.dumps(algo_param), "user": self._username, "userdata": userdata } cr = self._remote.call("pms.basket_order", rpc_params) return utils.extract_result(cr)
def logout(self): self._loggined = None rpc_params = {} cr = self._remote.call("auth.logout", rpc_params) return utils.extract_result(cr)
def logout(self): """ Logout to stop using the data api or switch users. """ self._loggined = None rpc_params = {} cr = self._remote.call("auth.logout", rpc_params) return utils.extract_result(cr)
def _do_use_strategy(self): if self._strategy_id: rpc_params = {"account_id": self._strategy_id} cr = self._remote.call("auth.use_strategy", rpc_params) r, msg = utils.extract_result(cr) self._strategy_selected = r return (r, msg) else: return (False, "-1,no strategy_id was specified")
def stop_portfolio(self): """ return (result, message) if result is None, message contains error information """ r, msg = self._check_session() if not r: return (False, msg) rpc_params = {} cr = self._remote.call("pms.stop_portfolio", rpc_params) return utils.extract_result(cr)
def cancel_order(self, task_id): """ return (result, message) if result is None, message contains error information """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {"task_id": task_id} cr = self._remote.call("oms.cancel_order", rpc_params) return utils.extract_result(cr)
def use_strategy(self, strategy_id): if strategy_id: self._strategy_id = strategy_id return self._do_use_strategy() else: # Query rpc_params = {"account_id": 0} cr = self._remote.call("auth.use_strategy", rpc_params) r, msg = utils.extract_result(cr) self._strategy_selected = r return (r, msg)
def query_universe(self, format=""): r, msg = self._check_session() if not r: return (None, msg) rpc_params = {} data_format = self._get_format(format, "pandas") if data_format == "pandas": rpc_params["format"] = "columnset" cr = self._remote.call("oms.query_universe", rpc_params) return utils.extract_result(cr, format=data_format, class_name="UniverseItem")
def _call_rpc(self, method, data_format, data_class, **kwargs): r, msg = self._check_session() if not r: return (r, msg) rpc_params = {} for kw in kwargs.items(): rpc_params[str(kw[0])] = kw[1] cr = self._remote.call(method, rpc_params) return utils.extract_result(cr, data_format=data_format, class_name=data_class)
def query_repo_contract(self, format=""): """ securities: seperate by "," return pd.dataframe """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {} cr = self._remote.call("oms.query_repo_contract", rpc_params) return utils.extract_result(cr, format=self._get_format(format, "pandas"), class_name="RepoContract")
def _do_login(self, format=""): # Shouldn't check connected flag here. ZMQ is a mesageq queue! # if !self._connected : # return (False, "-1,no connection") if self._username and self._password: rpc_params = { "username": self._username, "password": self._password } cr = self._remote.call("auth.login", rpc_params) f = self._get_format(format, "") if f != "obj" and f != "": f = "" return utils.extract_result(cr, format=f, class_name="UserInfo") else: return (False, "-1,empty username or password")
def query_trade(self, task_id=-1, format=""): """ task_id: -1 -- all return pd.dataframe """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {"task_id": task_id} data_format = self._get_format(format, "pandas") if data_format == "pandas": rpc_params["format"] = "columnset" cr = self._remote.call("oms.query_trade", rpc_params) return utils.extract_result(cr, format=data_format, class_name="Trade")
def query_account(self, format=""): """ return pd.dataframe """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {} data_format = self._get_format(format, "pandas") if data_format == "pandas": rpc_params["format"] = "columnset" cr = self._remote.call("oms.query_account", rpc_params) return utils.extract_result(cr, format=data_format, class_name="Account")
def _call_rpc(self, method, data_format, data_class, **kwargs): r, msg = self._check_session() if not r: return (r, msg) index_column = None rpc_params = {} for kw in kwargs.items(): if str(kw[0]) == "_index_column": index_column = kw[1] else: rpc_params[str(kw[0])] = kw[1] cr = self._remote.call(method, rpc_params, timeout=self._timeout) return utils.extract_result(cr, data_format=data_format, index_column=index_column, class_name=data_class)
def _do_login(self): # Shouldn't check connected flag here. ZMQ is a mesageq queue! # if !self._connected : # return (False, "-1,no connection") if self._username and self._password: rpc_params = { "username": self._username, "password": self._password } cr = self._remote.call("auth.login", rpc_params) r, msg = utils.extract_result(cr, data_format="", class_name="UserInfo") self._loggined = r return (r, msg) else: self._loggined = None return (False, "-1,empty username or password")
def query_net_position(self, mode="all", securities="", format=""): """ securities: seperate by "," return pd.dataframe """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {"mode": mode, "security": securities} data_format = self._get_format(format, "pandas") if data_format == "pandas": rpc_params["format"] = "columnset" cr = self._remote.call("oms.query_net_position", rpc_params) return utils.extract_result(cr, format=data_format, class_name="NetPosition")
def query_portfolio(self, format=""): """ return pd.dataframe """ r, msg = self._check_session() if not r: return (None, msg) rpc_params = {} data_format = self._get_format(format, "pandas") if data_format == "pandas": rpc_params["format"] = "columnset" cr = self._remote.call("pms.query_portfolio", rpc_params) return utils.extract_result(cr, index_column="security", format=data_format, class_name="NetPosition")
def predict_data_process(trigger_file, role_file, schema_file, save_path): """predict_data_process""" pred_ret = [] trigger_datas = read_by_lines(trigger_file) role_data = read_by_lines(role_file) schema_datas = read_by_lines(schema_file) print("trigger predict {} load from {}".format(len(trigger_datas), trigger_file)) print("role predict {} load from {}".format(len(role_data), role_file)) print("schema {} load from {}".format(len(schema_datas), schema_file)) schema = {} for s in schema_datas: d_json = json.loads(s) schema[d_json["event_type"]] = [ r["role"] for r in d_json["role_list"] ] # 字典:key-event_type,value-role_list, {event_type: [role1, role2, ...]} # process the role data sent_role_mapping = {} for d in role_data: d_json = json.loads(d) r_ret = extract_result(d_json["text"], d_json["pred"]["labels"]) role_ret = {} for r in r_ret: # 对每个预测到的论元组,即字典{'start': , 'text': , 'type': } role_type = r["type"] # 获取预测的role if role_type not in role_ret: role_ret[role_type] = [] role_ret[role_type].append( "".join(r["text"]) ) # 汇集论元对应的实例化的词,即{'role_type': [role_arg1, role_arg2, ...]} sent_role_mapping[d_json[ "id"]] = role_ret # 当前句子的{id : {'role_type': [role_arg1, role_arg2, ...]}} for d in trigger_datas: d_json = json.loads(d) t_ret = extract_result( d_json["text"], d_json["pred"]["labels"]) # [{'start': , 'text': , 'type': }, ...] pred_event_types = list(set([ t["type"] for t in t_ret ])) # 获取预测的trigger类型,即事件类型。利用触发词来拿到事件类型,一个句子可以有多个事件类型,所以用了序列标注的方法 event_list = [] for event_type in pred_event_types: role_list = schema[event_type] arguments = [] for role_type, ags in sent_role_mapping[d_json["id"]].items(): if role_type not in role_list: # 这里的判断可以排除理应不在本事件类型中出现的论元;我们只关心本事件类型出现的论元是否被预测到 continue for arg in ags: if len(arg) == 1: continue arguments.append({"role": role_type, "argument": arg}) event = {"event_type": event_type, "arguments": arguments} event_list.append(event) pred_ret.append({ "id": d_json["id"], "text": d_json["text"], "event_list": event_list }) pred_ret = [ json.dumps(r, ensure_ascii=False) for r in pred_ret ] # 最后的预测数据里的event_list是没有index的,只有event_type, {role, argument} print("submit data {} save to {}".format(len(pred_ret), save_path)) write_by_lines(save_path, pred_ret)
def predict_data_process(trigger_file, role_file, enum_file, schema_file, save_path): """predict_data_process""" pred_ret = [] trigger_data = read_by_lines(trigger_file) role_data = read_by_lines(role_file) enum_data = read_by_lines(enum_file) schema_data = read_by_lines(schema_file) print("trigger predict {} load from {}".format(len(trigger_data), trigger_file)) print("role predict {} load from {}".format(len(role_data), role_file)) print("enum predict {} load from {}".format(len(enum_data), enum_file)) print("schema {} load from {}".format(len(schema_data), schema_file)) schema, sent_role_mapping, sent_enum_mapping = {}, {}, {} for s in schema_data: d_json = json.loads(s) schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]] # role depends on id and sent_id for d in role_data: d_json = json.loads(d) r_ret = extract_result(d_json["text"], d_json["pred"]["labels"]) role_ret = {} for r in r_ret: role_type = r["type"] if role_type not in role_ret: role_ret[role_type] = [] role_ret[role_type].append("".join(r["text"])) _id = "{}\t{}".format(d_json["id"], d_json["sent_id"]) sent_role_mapping[_id] = role_ret # process the enum_role data for d in enum_data: d_json = json.loads(d) _id = "{}\t{}".format(d_json["id"], d_json["sent_id"]) label = d_json["pred"]["label"] sent_enum_mapping[_id] = label # process trigger data for d in trigger_data: d_json = json.loads(d) t_ret = extract_result(d_json["text"], d_json["pred"]["labels"]) pred_event_types = list(set([t["type"] for t in t_ret])) event_list = [] _id = "{}\t{}".format(d_json["id"], d_json["sent_id"]) for event_type in pred_event_types: role_list = schema[event_type] arguments = [] for role_type, ags in sent_role_mapping[_id].items(): if role_type not in role_list: continue for arg in ags: arguments.append({"role": role_type, "argument": arg}) # 特殊处理环节 if event_type == enum_event_type: arguments.append({ "role": enum_role, "argument": sent_enum_mapping[_id] }) event = { "event_type": event_type, "arguments": arguments, "text": d_json["text"] } event_list.append(event) pred_ret.append({ "id": d_json["id"], "sent_id": d_json["sent_id"], "text": d_json["text"], "event_list": event_list }) doc_pred = {} for d in pred_ret: if d["id"] not in doc_pred: doc_pred[d["id"]] = {"id": d["id"], "event_list": []} doc_pred[d["id"]]["event_list"].extend(d["event_list"]) # unfiy the all prediction results and save them doc_pred = [ json.dumps(event_normalization(r), ensure_ascii=False) for r in doc_pred.values() ] print("submit data {} save to {}".format(len(doc_pred), save_path)) write_by_lines(save_path, doc_pred)