def stream_manager_010003(cls, d_xoi=None): sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, type_name_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \ "FROM x_fund_info_private xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_private GROUP BY fund_id) tb_latest " \ "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \ "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010003' AND im.is_used = 1 " \ "LEFT JOIN base.fund_info fi " \ "ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010003' " \ "WHERE fund_name_amac NOT LIKE '%%信托计划'" inp = MysqlInput(ENGINE_C, sql_maxver) inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info") def clean_org_type(mng_tp, tp): # 私募证券投资基金 = 证券投资基金; 其他私募投资基金 = 其他投资基金 if mng_tp == "顾问管理": if tp in {"私募证券投资基金", "证券投资基金"}: return "基金管理人" elif mng_tp == "受托管理": if tp in {"私募证券投资基金", "证券投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金", "股权投资基金"}: return "基金管理人" elif mng_tp == "自我管理": if tp in {"私募证券投资基金", "证券投资基金", "股权投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金"}: return "基金管理人" if d_xoi is None: d_xoi = cls._d_oi_manager() vm = transform.ValueMap( OrderedDict([ ("fund_issue_org_amac", lambda x: x.strip()), ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")), (FundOrgMapping.org_type.name, (lambda mng_tp, tp: clean_org_type(mng_tp, tp), "manage_type_amac", "type_name_amac")), (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status")) ]) ) jn = transform.Join(inp2, how="left", on="org_id") dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name]) vm2 = transform.ValueMap({ FundOrgMapping.org_type_code.name: (lambda x: 2 if x == "基金管理人" else None, FundOrgMapping.org_type.name), }) sk = transform.MapSelectKeys(cls.FIELDS) s = Stream(inp, transform=(vm, jn, dn, vm2, sk)) return s
def stream_020002(): DATA_SOURCE = "020002" session = dbsession(bind=ENGINE) stmt = session.query(IdMatch).join( DOrgPerson, and_(IdMatch.source_id == DOrgPerson.person_id, IdMatch.data_source == DOrgPerson.data_source, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).join( PersonInfo, and_(IdMatch.matched_id == PersonInfo.person_id, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).with_entities( IdMatch.matched_id.label( OrgPersonMapping.person_id.name), PersonInfo.person_name, DOrgPerson.org_id, DOrgPerson.tenure_date, DOrgPerson.dimission_date, DOrgPerson.is_current, DOrgPerson.duty) inp = MysqlInput(session.bind, stmt) fi = stream_oi(DATA_SOURCE) jn = transform.Join(fi, left_on=DOrgPerson.org_id.name, right_on=IdMatch.source_id.name) sk = transform.MapSelectKeys({ OrgPersonMapping.person_id.name: None, PersonInfo.person_name.name: OrgPersonMapping.person_name.name, IdMatch.matched_id.name: OrgPersonMapping.org_id.name, DOrgPerson.duty.name: OrgPersonMapping.duty.name, OrgInfo.org_name.name: OrgPersonMapping.org_name.name, DOrgPerson.tenure_date.name: OrgPersonMapping.tenure_date.name, DOrgPerson.dimission_date.name: OrgPersonMapping.dimission_date.name, DOrgPerson.is_current.name: OrgPersonMapping.is_current.name }) s = base.Stream(inp, (jn, sk)) return s
def stream_consultant_010004(cls, d_xoi=None): """ 清洗010004源(证券公司)发行产品的投顾公司配对信息; Args: d_xoi: Returns: """ sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \ "FROM x_fund_info_securities xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_securities GROUP BY fund_id) tb_latest " \ "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \ "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010004' AND im.is_used = 1 " \ "LEFT JOIN base.fund_info fi ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010004'" \ "WHERE fund_name_amac NOT LIKE '%%信托计划'" inp = MysqlInput(ENGINE_C, sql_maxver) inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info") if d_xoi is None: d_xoi = cls._d_oi_consultant() vm = transform.ValueMap( OrderedDict([ ("fund_issue_org_amac", lambda x: x.strip()), ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")), (FundOrgMapping.org_type.name, (lambda x: {"主动管理": "投资顾问"}.get(x), "manage_type_amac")), (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status")) ]) ) jn = transform.Join(inp2, how="left", on="org_id") dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name]) vm2 = transform.ValueMap({ FundOrgMapping.org_type_code.name: (lambda x: 1 if x == "投资顾问" else None, FundOrgMapping.org_type.name), }) dd = transform.DropDuplicate(subset=["matched_id", "org_id", FundOrgMapping.org_type.name]) sk = transform.MapSelectKeys(cls.FIELDS) s = Stream(inp, transform=[vm, jn, dn, vm2, dd, sk]) # 先去空, 避免空值产生, 导致整形列变成浮点型 return s
def stream_manager_010005(cls, d_xoi=None): """ 清洗010005源(期货公司)发行产品的基金管理人配对信息; Args: d_xoi: Returns: """ sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \ "FROM x_fund_info_futures xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_futures GROUP BY fund_id) tb_latest " \ "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \ "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010005' AND im.is_used = 1 " \ "LEFT JOIN base.fund_info fi " \ "ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010005' " \ "WHERE fund_name_amac NOT LIKE '%%信托计划'" inp = MysqlInput(ENGINE_C, sql_maxver) inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info") if d_xoi is None: d_xoi = cls._d_oi_manager() vm = transform.ValueMap(OrderedDict([ ("fund_issue_org_amac", lambda x: x.strip()), ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")), (FundOrgMapping.org_type.name, "基金管理人"), # 该来源的fund_issue_org_amac都是基金管理人; (FundOrgMapping.org_type_code.name, 2), (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status")) ])) jn = transform.Join(inp2, how="left", on="org_id") sk = transform.MapSelectKeys(cls.FIELDS) dn = transform.Dropna( subset=[FundOrgMapping.fund_id.name, FundOrgMapping.org_id.name, FundOrgMapping.org_type_code.name]) s = Stream(inp, transform=(vm, jn, sk, dn,)) return s
def conflu_fund_num(cls): # fund_num, fund_total_num # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况; sql_operating = "SELECT t1.org_id, t2.fund_num FROM {tb_test} t1 " \ "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_num FROM {tb_test} oi " \ "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \ "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \ "WHERE fom.org_type_code = 1 AND fi.fund_status = '运行中'" \ "GROUP BY fom.org_id) t2 " \ "ON t1.org_id = t2.org_id ".format(tb_test=TEST_TABLE) sql_total = "SELECT t1.org_id, t2.fund_total_num FROM {tb_test} t1 " \ "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_total_num FROM {tb_test} oi " \ "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \ "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \ "WHERE fom.org_type_code = 1 " \ "GROUP BY fom.org_id) t2 " \ "ON t1.org_id = t2.org_id".format(tb_test=TEST_TABLE) inp = MysqlInput(ENGINE_RD, sql_operating) inp_total = MysqlInput(ENGINE_RD, sql_total) jn = transform.Join(inp_total, how="outer", on="org_id") vm = transform.ValueMap({ "fund_num": lambda x: 0 if np.isnan(x) else x, "fund_total_num": lambda x: 0 if np.isnan(x) else x, }) sk = transform.MapSelectKeys({ "org_id": OrgInfo.org_id.name, "fund_num": OrgInfo.fund_num.name, "fund_total_num": OrgInfo.fund_total_num.name }) s = Stream(inp, [jn, vm, sk]) return Confluence(s)
def stream_020001(): DATA_SOURCE = "020001" session = dbsession(bind=ENGINE) stmt = session.query(IdMatch).join( DOrgPerson, and_(IdMatch.source_id == DOrgPerson.person_id, IdMatch.data_source == DOrgPerson.data_source, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).join( PersonInfo, and_(IdMatch.matched_id == PersonInfo.person_id, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).with_entities( IdMatch.matched_id.label( OrgPersonMapping.person_id.name), PersonInfo.person_name, DOrgPerson.org_id, DOrgPerson.tenure_date, DOrgPerson.dimission_date.name, DOrgPerson.is_current, DOrgPerson.duty) inp = MysqlInput(session.bind, stmt) oi = stream_oi(DATA_SOURCE) def clean_duty_detail(duty_detail): duty_detail = duty_detail.strip() duty_detail = duty_detail.replace("(", "(") duty_detail = duty_detail.replace(")", ")") duty_detail = duty_detail.replace("基金经理,", "") return duty_detail vm1 = transform.ValueMap({ OrgPersonMapping.duty_detail.name: (lambda x: clean_duty_detail(x), DOrgPerson.duty.name) }) vm2 = transform.ValueMap( {OrgPersonMapping.duty.name: lambda x: x if x == "基金经理" else "高管"}) jn = transform.Join(oi, left_on=DOrgPerson.org_id.name, right_on=IdMatch.source_id.name) sk = transform.MapSelectKeys({ OrgPersonMapping.person_id.name: None, PersonInfo.person_name.name: OrgPersonMapping.person_name.name, IdMatch.matched_id.name: OrgPersonMapping.org_id.name, OrgInfo.org_name.name: OrgPersonMapping.org_name.name, OrgPersonMapping.duty.name: None, OrgPersonMapping.duty_detail.name: None, DOrgPerson.tenure_date.name: OrgPersonMapping.tenure_date.name, DOrgPerson.dimission_date.name: OrgPersonMapping.dimission_date.name, DOrgPerson.is_current.name: OrgPersonMapping.is_current.name }) s = base.Stream(inp, (vm1, vm2, jn, sk)) return s
def stream_consultant_010003(cls, d_xoi=None): """ 清洗010003源(私募基金公司)发行产品的投顾公司配对信息; Args: d_xoi: Returns: """ sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, type_name_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \ "FROM x_fund_info_private xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_private GROUP BY fund_id) tb_latest " \ "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \ "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010003' AND im.is_used = 1 " \ "LEFT JOIN base.fund_info fi ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010003'" \ "WHERE fund_name_amac NOT LIKE '%%信托计划'" inp = MysqlInput(ENGINE_C, sql_maxver) inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info") def clean_org_type(manage_type, type_name): if manage_type == "顾问管理": if type_name in {"信托计划", "期货公司及其子公司的资产管理计划", "其他", "基金子公司", "基金专户", "证券公司及其子公司的资产管理计划", "银行理财产品", "保险公司及其子公司的资产管理计划", "私募证券投资基金", "私募投资基金"}: return "投资顾问" elif manage_type == "受托管理": if type_name in {"创业投资基金", "其他私募投资基金", "其他投资基金", "股权投资基金"}: return "投资顾问" elif manage_type == "自我管理": if type_name in {"私募证券投资基金", "私募投资基金", "股权投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金"}: return "投资顾问" if d_xoi is None: d_xoi = cls._d_oi_consultant() vm = transform.ValueMap( OrderedDict([ ("fund_issue_org_amac", lambda x: x.strip()), ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")), (FundOrgMapping.org_type.name, (lambda mng_tp, tp: clean_org_type(mng_tp, tp), "manage_type_amac", "type_name_amac")), (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status")) ]) ) jn = transform.Join(inp2, how="left", on="org_id") dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name]) vm2 = transform.ValueMap({ FundOrgMapping.org_type_code.name: (lambda x: 1 if x == "投资顾问" else None, FundOrgMapping.org_type.name), }) dd = transform.DropDuplicate(subset=["matched_id", "org_id", FundOrgMapping.org_type.name]) sk = transform.MapSelectKeys( { "matched_id": FundOrgMapping.fund_id.name, "fund_name": FundOrgMapping.fund_name.name, "org_id": FundOrgMapping.org_id.name, "org_name": FundOrgMapping.org_name.name, FundOrgMapping.org_type_code.name: None, FundOrgMapping.org_type.name: None, } ) s = Stream(inp, transform=(vm, jn, dn, vm2, dd, sk)) return s
stream = Stream(inp.frame, ()) return stream class Confluence: def __init__(self, **streams): self._streams = streams s1 = stream_x_fund_info_account() s1.flow() s2 = stream_x_fund_info_private() s2.flow() s3 = stream_x_fund_info_security() s3.flow() s4 = stream_x_fund_info_future() s4.flow() im = MysqlInput(engine_b, "\ SELECT matched_id, source_id FROM id_match WHERE id_type = 1 AND source = '010001' AND is_used = 1") im.frame j = transform.Join(on="source_id") j.process(s1.frame, im.frame) j.process(s2.frame, im.frame) j.process(s3.frame, im.frame) j.process(s4.frame, im.frame)