def predict_data_process(trigger_file, role_file, schema_file, save_path):
    """predict_data_process"""
    pred_ret = []
    trigger_datas = read_by_lines(trigger_file)
    role_data = read_by_lines(role_file)
    schema_datas = read_by_lines(schema_file)
    print("trigger predict {} load from {}".format(len(trigger_datas),
                                                   trigger_file))
    print("role predict {} load from {}".format(len(role_data), role_file))
    print("schema {} load from {}".format(len(schema_datas), schema_file))

    schema = {}
    for s in schema_datas:
        d_json = json.loads(s)
        schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]]

    # process the role data
    sent_role_mapping = {}
    for d in role_data:
        d_json = json.loads(d)
        r_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
        role_ret = {}
        for r in r_ret:
            role_type = r["type"]
            if role_type not in role_ret:
                role_ret[role_type] = []
            role_ret[role_type].append("".join(r["text"]))
        sent_role_mapping[d_json["id"]] = role_ret

    for d in trigger_datas:
        d_json = json.loads(d)
        t_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
        pred_event_types = list(set([t["type"] for t in t_ret]))
        event_list = []
        for event_type in pred_event_types:
            role_list = schema[event_type]
            arguments = []
            for role_type, ags in sent_role_mapping[d_json["id"]].items():
                if role_type not in role_list:
                    continue
                for arg in ags:
                    if len(arg) == 1:
                        continue
                    arguments.append({"role": role_type, "argument": arg})
            event = {"event_type": event_type, "arguments": arguments}
            event_list.append(event)
        pred_ret.append({
            "id": d_json["id"],
            "text": d_json["text"],
            "event_list": event_list
        })
    pred_ret = [json.dumps(r, ensure_ascii=False) for r in pred_ret]
    print("submit data {} save to {}".format(len(pred_ret), save_path))
    write_by_lines(save_path, pred_ret)
Exemple #2
0
    def subscribe(self, symbol, func=None, fields="", data_format=""):
        """Subscribe securites
        
        This function adds new securities to subscribed list on the server. If
        success, return subscribed codes.
        
        If securities is empty, return current subscribed codes.
        """
        r, msg = self._check_session()
        if not r:
            return (r, msg)

        if func:
            self._on_jsq_callback = func

        rpc_params = {"symbol": symbol, "fields": fields}

        cr = self._remote.call("jsq.subscribe", rpc_params)

        rsp, msg = utils.extract_result(cr,
                                        data_format="",
                                        class_name="SubRsp")
        if not rsp:
            return (rsp, msg)

        new_codes = [x.strip() for x in symbol.split(',') if x]

        self._subscribed_set = self._subscribed_set.union(set(new_codes))
        self._schema_id = rsp['schema_id']
        self._schema = rsp['schema']
        self._sub_hash = rsp['sub_hash']
        return (rsp['securities'], msg)
Exemple #3
0
    def _do_subscribe(self):
        """Subscribe again when reconnected or hash_code is not same"""
        if not self._subscribed_set: return

        codes = list(self._subscribed_set)
        codes.sort()

        # XXX subscribe with default fields!
        rpc_params = {"symbol": ",".join(codes), "fields": ""}

        cr = self._remote.call("jsq.subscribe", rpc_params)

        rsp, msg = utils.extract_result(cr,
                                        data_format="",
                                        class_name="SubRsp")
        if not rsp:
            #return (rsp, msg)
            return

        self._schema_id = rsp['schema_id']
        self._schema = rsp['schema']
        self._sub_hash = rsp['sub_hash']
        #return (rsp.securities, msg)

        self._make_schema_map()
Exemple #4
0
    def batch_order(self, orders, algo="", algo_param={}, userdata=""):
        """
        orders format:
            [ {"security": "000001.SZ", "action": "Buy", "price": 10.0, "size" : 100}, ... ]
        return (result, message)
        if result is None, message contains error information
        """

        if not orders or not isinstance(orders, (list, tuple)):
            return (None, "empty order")

        if isinstance(orders[0], EntrustOrder):
            tmp = []
            for o in orders:
                tmp.append({
                    "security": o.security,
                    "price": o.price,
                    "size": int(o.size)
                })

            orders = tmp

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {
            "orders": orders,
            "algo": algo,
            "algo_param": json.dumps(algo_param),
            "user": self._username,
            "userdata": userdata
        }

        cr = self._remote.call("oms.batch_order", rpc_params)
        return utils.extract_result(cr)
Exemple #5
0
    def place_order(self,
                    security,
                    action,
                    price,
                    size,
                    algo="",
                    algo_param={},
                    userdata=""):
        """
        return (result, message)
        if result is None, message contains error information
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {
            "security": security,
            "action": action,
            "price": price,
            "size": int(size),
            "algo": algo,
            "algo_param": json.dumps(algo_param),
            "user": self._username,
            "userdata": userdata
        }

        cr = self._remote.call("oms.place_order", rpc_params)
        return utils.extract_result(cr)
Exemple #6
0
    def basket_order(self, orders, algo="", algo_param={}, userdata=""):
        """
        orders format:
            [ {"security": "000001.SZ", "ref_price": 10.0, "inc_size" : 100}, ...]
        return (result, message)
        if result is None, message contains error information
        """

        r, msg = self._check_session()
        if not r: return (False, msg)

        if type(orders) is pd.core.frame.DataFrame:
            tmp = []
            for i in xrange(0, len(orders)):
                tmp.append({
                    'security': orders.index[i],
                    'ref_price': float(orders['ref_price'][i]),
                    "inc_size": int(orders['inc_size'][i])
                })
            orders = tmp

        rpc_params = {
            "orders": orders,
            "algo": algo,
            "algo_param": json.dumps(algo_param),
            "user": self._username,
            "userdata": userdata
        }

        cr = self._remote.call("pms.basket_order", rpc_params)
        return utils.extract_result(cr)
Exemple #7
0
    def logout(self):

        self._loggined = None

        rpc_params = {}

        cr = self._remote.call("auth.logout", rpc_params)
        return utils.extract_result(cr)
Exemple #8
0
    def logout(self):
        """
        Logout to stop using the data api or switch users.

        """
        self._loggined = None

        rpc_params = {}

        cr = self._remote.call("auth.logout", rpc_params)
        return utils.extract_result(cr)
Exemple #9
0
    def _do_use_strategy(self):
        if self._strategy_id:
            rpc_params = {"account_id": self._strategy_id}

            cr = self._remote.call("auth.use_strategy", rpc_params)
            r, msg = utils.extract_result(cr)
            self._strategy_selected = r

            return (r, msg)
        else:
            return (False, "-1,no strategy_id was specified")
Exemple #10
0
    def stop_portfolio(self):
        """
        return (result, message)
        if result is None, message contains error information
        """

        r, msg = self._check_session()
        if not r: return (False, msg)

        rpc_params = {}

        cr = self._remote.call("pms.stop_portfolio", rpc_params)
        return utils.extract_result(cr)
Exemple #11
0
    def cancel_order(self, task_id):
        """
        return (result, message)
        if result is None, message contains error information
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        cr = self._remote.call("oms.cancel_order", rpc_params)
        return utils.extract_result(cr)
Exemple #12
0
    def use_strategy(self, strategy_id):
        if strategy_id:
            self._strategy_id = strategy_id
            return self._do_use_strategy()
        else:
            # Query
            rpc_params = {"account_id": 0}

            cr = self._remote.call("auth.use_strategy", rpc_params)
            r, msg = utils.extract_result(cr)
            self._strategy_selected = r

            return (r, msg)
Exemple #13
0
    def query_universe(self, format=""):

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}
        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_universe", rpc_params)

        return utils.extract_result(cr,
                                    format=data_format,
                                    class_name="UniverseItem")
Exemple #14
0
    def _call_rpc(self, method, data_format, data_class, **kwargs):

        r, msg = self._check_session()
        if not r:
            return (r, msg)

        rpc_params = {}
        for kw in kwargs.items():
            rpc_params[str(kw[0])] = kw[1]

        cr = self._remote.call(method, rpc_params)

        return utils.extract_result(cr,
                                    data_format=data_format,
                                    class_name=data_class)
Exemple #15
0
    def query_repo_contract(self, format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        cr = self._remote.call("oms.query_repo_contract", rpc_params)

        return utils.extract_result(cr,
                                    format=self._get_format(format, "pandas"),
                                    class_name="RepoContract")
Exemple #16
0
    def _do_login(self, format=""):
        # Shouldn't check connected flag here. ZMQ is a mesageq queue!
        # if !self._connected :
        #    return (False, "-1,no connection")

        if self._username and self._password:
            rpc_params = {
                "username": self._username,
                "password": self._password
            }

            cr = self._remote.call("auth.login", rpc_params)
            f = self._get_format(format, "")
            if f != "obj" and f != "":
                f = ""
            return utils.extract_result(cr, format=f, class_name="UserInfo")
        else:
            return (False, "-1,empty username or password")
Exemple #17
0
    def query_trade(self, task_id=-1, format=""):
        """
            task_id: -1 -- all
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_trade", rpc_params)

        return utils.extract_result(cr, format=data_format, class_name="Trade")
Exemple #18
0
    def query_account(self, format=""):
        """
            return pd.dataframe
        """
        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_account", rpc_params)

        return utils.extract_result(cr,
                                    format=data_format,
                                    class_name="Account")
Exemple #19
0
    def _call_rpc(self, method, data_format, data_class, **kwargs):

        r, msg = self._check_session()
        if not r:
            return (r, msg)

        index_column = None
        rpc_params = {}
        for kw in kwargs.items():
            if str(kw[0]) == "_index_column":
                index_column = kw[1]
            else:
                rpc_params[str(kw[0])] = kw[1]

        cr = self._remote.call(method, rpc_params, timeout=self._timeout)

        return utils.extract_result(cr,
                                    data_format=data_format,
                                    index_column=index_column,
                                    class_name=data_class)
Exemple #20
0
    def _do_login(self):
        # Shouldn't check connected flag here. ZMQ is a mesageq queue!
        # if !self._connected :
        #    return (False, "-1,no connection")

        if self._username and self._password:
            rpc_params = {
                "username": self._username,
                "password": self._password
            }

            cr = self._remote.call("auth.login", rpc_params)
            r, msg = utils.extract_result(cr,
                                          data_format="",
                                          class_name="UserInfo")
            self._loggined = r
            return (r, msg)
        else:
            self._loggined = None
            return (False, "-1,empty username or password")
Exemple #21
0
    def query_net_position(self, mode="all", securities="", format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"mode": mode, "security": securities}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_net_position", rpc_params)

        return utils.extract_result(cr,
                                    format=data_format,
                                    class_name="NetPosition")
Exemple #22
0
    def query_portfolio(self, format=""):
        """
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("pms.query_portfolio", rpc_params)

        return utils.extract_result(cr,
                                    index_column="security",
                                    format=data_format,
                                    class_name="NetPosition")
Exemple #23
0
def predict_data_process(trigger_file, role_file, schema_file, save_path):
    """predict_data_process"""
    pred_ret = []
    trigger_datas = read_by_lines(trigger_file)
    role_data = read_by_lines(role_file)
    schema_datas = read_by_lines(schema_file)
    print("trigger predict {} load from {}".format(len(trigger_datas),
                                                   trigger_file))
    print("role predict {} load from {}".format(len(role_data), role_file))
    print("schema {} load from {}".format(len(schema_datas), schema_file))

    schema = {}
    for s in schema_datas:
        d_json = json.loads(s)
        schema[d_json["event_type"]] = [
            r["role"] for r in d_json["role_list"]
        ]  # 字典:key-event_type,value-role_list, {event_type: [role1, role2, ...]}

    # process the role data
    sent_role_mapping = {}
    for d in role_data:
        d_json = json.loads(d)
        r_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
        role_ret = {}
        for r in r_ret:  # 对每个预测到的论元组,即字典{'start': , 'text': , 'type': }
            role_type = r["type"]  # 获取预测的role
            if role_type not in role_ret:
                role_ret[role_type] = []
            role_ret[role_type].append(
                "".join(r["text"])
            )  # 汇集论元对应的实例化的词,即{'role_type': [role_arg1, role_arg2, ...]}
        sent_role_mapping[d_json[
            "id"]] = role_ret  # 当前句子的{id : {'role_type': [role_arg1, role_arg2, ...]}}

    for d in trigger_datas:
        d_json = json.loads(d)
        t_ret = extract_result(
            d_json["text"],
            d_json["pred"]["labels"])  # [{'start': , 'text': , 'type': }, ...]
        pred_event_types = list(set([
            t["type"] for t in t_ret
        ]))  # 获取预测的trigger类型,即事件类型。利用触发词来拿到事件类型,一个句子可以有多个事件类型,所以用了序列标注的方法
        event_list = []
        for event_type in pred_event_types:
            role_list = schema[event_type]
            arguments = []
            for role_type, ags in sent_role_mapping[d_json["id"]].items():
                if role_type not in role_list:  # 这里的判断可以排除理应不在本事件类型中出现的论元;我们只关心本事件类型出现的论元是否被预测到
                    continue
                for arg in ags:
                    if len(arg) == 1:
                        continue
                    arguments.append({"role": role_type, "argument": arg})
            event = {"event_type": event_type, "arguments": arguments}
            event_list.append(event)
        pred_ret.append({
            "id": d_json["id"],
            "text": d_json["text"],
            "event_list": event_list
        })
    pred_ret = [
        json.dumps(r, ensure_ascii=False) for r in pred_ret
    ]  # 最后的预测数据里的event_list是没有index的,只有event_type, {role, argument}
    print("submit data {} save to {}".format(len(pred_ret), save_path))
    write_by_lines(save_path, pred_ret)
def predict_data_process(trigger_file, role_file, enum_file, schema_file,
                         save_path):
    """predict_data_process"""
    pred_ret = []
    trigger_data = read_by_lines(trigger_file)
    role_data = read_by_lines(role_file)
    enum_data = read_by_lines(enum_file)
    schema_data = read_by_lines(schema_file)
    print("trigger predict {} load from {}".format(len(trigger_data),
                                                   trigger_file))
    print("role predict {} load from {}".format(len(role_data), role_file))
    print("enum predict {} load from {}".format(len(enum_data), enum_file))
    print("schema {} load from {}".format(len(schema_data), schema_file))

    schema, sent_role_mapping, sent_enum_mapping = {}, {}, {}
    for s in schema_data:
        d_json = json.loads(s)
        schema[d_json["event_type"]] = [r["role"] for r in d_json["role_list"]]

    # role depends on id and sent_id
    for d in role_data:
        d_json = json.loads(d)
        r_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
        role_ret = {}
        for r in r_ret:
            role_type = r["type"]
            if role_type not in role_ret:
                role_ret[role_type] = []
            role_ret[role_type].append("".join(r["text"]))
        _id = "{}\t{}".format(d_json["id"], d_json["sent_id"])
        sent_role_mapping[_id] = role_ret

    # process the enum_role data
    for d in enum_data:
        d_json = json.loads(d)
        _id = "{}\t{}".format(d_json["id"], d_json["sent_id"])
        label = d_json["pred"]["label"]
        sent_enum_mapping[_id] = label

    # process trigger data
    for d in trigger_data:
        d_json = json.loads(d)
        t_ret = extract_result(d_json["text"], d_json["pred"]["labels"])
        pred_event_types = list(set([t["type"] for t in t_ret]))
        event_list = []
        _id = "{}\t{}".format(d_json["id"], d_json["sent_id"])
        for event_type in pred_event_types:
            role_list = schema[event_type]
            arguments = []
            for role_type, ags in sent_role_mapping[_id].items():
                if role_type not in role_list:
                    continue
                for arg in ags:
                    arguments.append({"role": role_type, "argument": arg})
            # 特殊处理环节
            if event_type == enum_event_type:
                arguments.append({
                    "role": enum_role,
                    "argument": sent_enum_mapping[_id]
                })
            event = {
                "event_type": event_type,
                "arguments": arguments,
                "text": d_json["text"]
            }
            event_list.append(event)
        pred_ret.append({
            "id": d_json["id"],
            "sent_id": d_json["sent_id"],
            "text": d_json["text"],
            "event_list": event_list
        })
    doc_pred = {}
    for d in pred_ret:
        if d["id"] not in doc_pred:
            doc_pred[d["id"]] = {"id": d["id"], "event_list": []}
        doc_pred[d["id"]]["event_list"].extend(d["event_list"])

    # unfiy the all prediction results and save them
    doc_pred = [
        json.dumps(event_normalization(r), ensure_ascii=False)
        for r in doc_pred.values()
    ]
    print("submit data {} save to {}".format(len(doc_pred), save_path))
    write_by_lines(save_path, doc_pred)