def stream_010003_ts104(cls): source_id = "010003" tb = "crawl_private.x_fund_info_private" sql = "SELECT im.matched_id, xfi.type_name_amac, fi.fund_name " \ "FROM {tb} xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM {tb} GROUP BY fund_id) t " \ "ON xfi.fund_id = t.fund_id AND xfi.version = t.latest_ver " \ "JOIN (SELECT matched_id, source_id FROM base.id_match WHERE source = '{sid}' AND id_type = 1 AND is_used = 1) im " \ "ON xfi.fund_id = im.source_id " \ "LEFT JOIN fund_info fi ON im.matched_id = fi.fund_id".format( tb=tb, sid=source_id ) inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap( { FundTypeSource.source_id.name: source_id, FundTypeSource.typestandard_code.name: "104", FundTypeSource.typestandard_name.name: "按发行主体分类", FundTypeSource.type_code.name: (lambda x: cls.TS_MAPPER_104.get(x, [None, None])[0], "type_name_amac"), FundTypeSource.type_name.name: (lambda x: cls.TS_MAPPER_104.get(x, [None, None])[1], "type_name_amac") } ) dn = transform.Dropna(subset=[FundTypeSource.fund_id.name, FundTypeSource.type_code.name]) s = base.Stream(inp, [vm, cls.SK, dn]) return s
def stream_010005_ts103(cls): source_id = "010005" tb = "crawl_private.x_fund_info_futures" tmpsql = cls.SQL_103.format(sid=source_id, tb=tb) inp = MysqlInput(ENGINE, tmpsql) vm = transform.ValueMap(OrderedDict( [ [FundTypeSource.type_code.name, (lambda x: cls.TS_MAPPER_103.get(x, [None, None])[0], "fund_type_target_amac")], [FundTypeSource.type_name.name, (lambda x: cls.TS_MAPPER_103.get(x, [None, None])[1], "fund_type_target_amac")], [FundTypeSource.typestandard_code.name, "103"], [FundTypeSource.typestandard_name.name, "按投资标的分类"], [FundTypeSource.source_id.name, source_id] ] )) sk = transform.MapSelectKeys( { "matched_id": FundTypeSource.fund_id.name, "fund_name": FundTypeSource.fund_name.name, FundTypeSource.source_id.name: None, FundTypeSource.typestandard_name.name: None, FundTypeSource.typestandard_code.name: None, FundTypeSource.type_code.name: None, FundTypeSource.type_name.name: None, } ) dn = transform.Dropna(subset=[FundTypeSource.type_code.name]) s = base.Stream(inp, transform=(vm, sk, dn)) return s
def stream_x_org_info(): sql = "SELECT xoi.org_id, xoi.final_report_time, fund_num, fund_scale FROM crawl_private.x_org_info xoi " \ "JOIN (SELECT org_id, MAX(version) latest_ver FROM crawl_private.x_org_info WHERE is_used = 1 GROUP BY org_id) tb " \ "ON xoi.org_id = tb.org_id AND xoi.version = tb.latest_ver" inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap({OrgAssetScale.data_time.name: dt.date.today()}) sk = transform.MapSelectKeys({ "org_id": OrgAssetScale.org_id.name, "final_report_time": OrgAssetScale.statistic_date.name, "fund_num": OrgAssetScale.funds_num.name, "fund_scale": OrgAssetScale.asset_scale.name, OrgAssetScale.data_time.name: None }) dn = transform.Dropna(subset=[ OrgAssetScale.asset_scale.name, OrgAssetScale.funds_num.name, OrgAssetScale.statistic_date.name ], how="all") s = base.Stream(inp, transform=(vm, sk, dn)) return s
def stream_020001(cls, fund_ids): sql = "SELECT im.matched_id, fi.fund_name, im.data_source, dfp.statistic_date, dfp.subject_id, dfp.subject_name, " \ "dfp.scale, dfp.proportion, fas.total_asset " \ "FROM base_public.id_match im " \ "JOIN crawl_public.d_fund_position dfp " \ "ON im.source_id = dfp.fund_id AND im.data_source = dfp.data_source " \ "JOIN base_public.fund_info fi ON im.matched_id = fi.fund_id " \ "left JOIN base_public.fund_asset_scale fas ON dfp.fund_id = fas.fund_id AND dfp.statistic_date = fas.statistic_date " \ "WHERE type = '债券' AND im.matched_id IN {fids} AND im.id_type = 1 AND im.is_used = 1 " \ "AND dfp.fund_id NOT IN (" \ "SELECT DISTINCT fund_id FROM crawl_public.d_fund_position WHERE data_source = '020002')" \ "AND im.data_source = '020001'".format(fids=sf.SQL.values4sql(fund_ids)) inp = MysqlInput(cls.engine, sql) vm = transform.ValueMap({ "proportion": lambda x: float(x.replace("%", "")) / 100, "scale": lambda x: cls._clean_amount(x), }) sk = transform.MapSelectKeys({ "matched_id": "fund_id", "fund_name": None, "data_source": None, "statistic_date": None, "subject_id": None, "subject_name": None, "scale": None, "proportion": "proportion_net", "total_asset": "asset_scale", }) return base.Stream(inp, transform=[vm, sk])
def stream18_60409(cls): tmp_sql = "SELECT fi.fund_id, fi.fund_name " \ "FROM fund_type_source fts " \ "JOIN fund_info fi ON fts.fund_id = fi.fund_id WHERE fi.fund_id NOT IN (" \ "SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 604) " \ "AND fi.fund_id IN (" \ "SELECT DISTINCT fund_id FROM fund_type_source WHERE " \ "type_code IN (30410, 30411)) " \ "AND fi.fund_id IN (" \ "SELECT DISTINCT fund_id FROM fund_type_source WHERE " \ "type_code = 40407)" inp = MysqlInput(ENGINE, tmp_sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 604, FundTypeMappingImport.typestandard_name.name: "按发行主体分类", FundTypeMappingImport.type_code.name: 60409, FundTypeMappingImport.type_name.name: "单账户", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s
def stream_x_org_info(): sql = "SELECT xoi.org_id, xoi.final_report_time, real_capital, reg_capital, employee_scale FROM crawl_private.x_org_info xoi " \ "JOIN (SELECT org_id, MAX(version) latest_ver FROM crawl_private.x_org_info WHERE is_used = 1 GROUP BY org_id) tb " \ "ON xoi.org_id = tb.org_id AND xoi.version = tb.latest_ver" inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap( { "employee_scale": lambda x: int(x.replace(",", "")), OrgTimeseries.data_time.name: dt.date.today() } ) sk = transform.MapSelectKeys( { "org_id": OrgTimeseries.org_id.name, "final_report_time": OrgTimeseries.statistic_date.name, "real_capital": OrgTimeseries.real_capital.name, "reg_capital": OrgTimeseries.reg_capital.name, "employee_scale": OrgTimeseries.employee_scale.name, OrgTimeseries.data_time.name: None } ) s = base.Stream(inp, transform=(vm, sk)) return s
def stream_020003(cls, fund_ids): sql = "SELECT im.matched_id, dfp.statistic_date, dfp.subject_id, fpb.data_source, dfp.quantity " \ "FROM base_public.id_match im " \ "JOIN crawl_public.d_fund_position dfp " \ "ON im.source_id = dfp.fund_id AND im.data_source = dfp.data_source " \ "JOIN base_test.fund_position_bond_test_20180515 fpb " \ "ON im.matched_id = fpb.fund_id AND fpb.statistic_date = dfp.statistic_date AND fpb.subject_id = dfp.subject_id " \ "WHERE type = '债券' AND im.matched_id IN {fids} AND im.id_type = 1 AND im.is_used = 1 " \ "AND im.data_source = '020003'".format(fids=sf.SQL.values4sql(fund_ids)) inp = MysqlInput(cls.engine, sql) vm = transform.ValueMap({ "quantity": lambda x: cls._clean_amount(x), }) sk = transform.MapSelectKeys({ "matched_id": "fund_id", "statistic_date": None, "subject_id": None, "quantity": None }) dn = transform.Dropna(subset=["quantity"]) return base.Stream(inp, transform=[vm, sk, dn])
def stream_010005(): sql_fi = "SELECT im.matched_id as fund_id, fi.fund_name, fi.foundation_date, issuing_scale_amac " \ "FROM crawl_private.{tb} xfi " \ "JOIN (SELECT fund_id, MAX(version) latest_ver FROM crawl_private.{tb} GROUP BY fund_id) tb_latest " \ "ON tb_latest.latest_ver = xfi.version AND tb_latest.fund_id = xfi.fund_id " \ "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.source = '010005' AND im.id_type = 1 AND im.is_used = 1 " \ "LEFT JOIN base.fund_info fi ON fi.fund_id = im.matched_id".format(tb="x_fund_info_futures") inp = MysqlInput(ENGINE_C, sql_fi) vm = transform.ValueMap({ "issuing_scale_amac": lambda x: float(x), }) vm2 = transform.ValueMap({ "issuing_scale_amac": lambda x: None if x == 0 else x, }) dn = transform.Dropna( subset=["fund_id", "foundation_date", "issuing_scale_amac"]) sk = transform.MapSelectKeys({ "fund_id": FundAssetScale.fund_id.name, "fund_name": FundAssetScale.fund_name.name, "issuing_scale_amac": FundAssetScale.asset_scale.name, "foundation_date": FundAssetScale.statistic_date.name, }) s = base.Stream(inp, transform=(vm, vm2, dn, sk)) return s
def stream_020001_ts304(cls): source_id = "020001" tb = "crawl_private.d_fund_info" tmpsql = cls.SQL_X04.format(tb=tb, sid=source_id) inp = MysqlInput(ENGINE, tmpsql) vm = transform.ValueMap( OrderedDict([ [FundTypeSource.source_id.name, source_id], [FundTypeSource.typestandard_code.name, "104"], [FundTypeSource.typestandard_name.name, "按发行主体分类"], [ "__cached", (lambda x: cls.TS_MAPPER_304.get(x, [None, None]), "fund_type_issuance") ], [FundTypeSource.type_code.name, (lambda x: x[0], "__cached")], [FundTypeSource.type_name.name, (lambda x: x[1], "__cached")], ])) sk = transform.MapSelectKeys({ "matched_id": FundTypeSource.fund_id.name, "fund_name": FundTypeSource.fund_name.name, FundTypeSource.source_id.name: None, FundTypeSource.typestandard_code.name: None, FundTypeSource.typestandard_name.name: None, FundTypeSource.type_code.name: None, FundTypeSource.type_name.name: None, }) dn = transform.Dropna(subset=[FundTypeSource.type_code.name]) s = base.Stream(inp, [vm, sk, dn]) return s
def stream_602_4(cls, c_cleaned): sql = "SELECT fi.fund_id, fi.fund_name " \ "FROM base.fund_info fi " \ "WHERE fi.fund_id NOT IN (SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 602) " inp = MysqlInput(ENGINE, sql) # 非惰性求值 fids = set(c_cleaned.dataframe["fund_id"]) vm0 = transform.ValueMap({ "__tmp": (lambda x: (60201, "非结构化", None, None) if x not in fids else (None, None, None, None), "fund_id") }) vm1 = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 602, FundTypeMappingImport.typestandard_name.name: "按结构类型分类", FundTypeMappingImport.type_code.name: (lambda x: x[0], "__tmp"), FundTypeMappingImport.type_name.name: (lambda x: x[1], "__tmp"), FundTypeMappingImport.stype_code.name: (lambda x: x[2], "__tmp"), FundTypeMappingImport.stype_name.name: (lambda x: x[3], "__tmp"), FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) dn = transform.Dropna(subset=[FundTypeMappingImport.type_code.name, FundTypeMappingImport.stype_code.name], how="all") s = base.Stream(inp, [vm0, vm1, sk, dn]) return s
def stream_020001_op(): sql = "SELECT im.matched_id, person_name FROM crawl_private.d_org_person tb_main " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.d_org_person GROUP BY person_id) tb_latest " \ "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \ "JOIN base.id_match im ON im.source_id = tb_main.person_id " \ "AND im.id_type = 3 AND im.source = '020001' AND im.is_used = 1 " inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap( OrderedDict([ [ PersonInfo.person_name_py.name, (lambda x: "".join( [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(), "person_name") ], ])) sk = transform.MapSelectKeys({ "matched_id": PersonInfo.person_id.name, "person_name": PersonInfo.person_name.name, PersonInfo.person_name_py.name: None, }) s = base.Stream(inp, transform=(vm, sk)) return s
def stream_010001(cls): SOURCE = "010001" sql = "SELECT im.matched_id, pi.person_name, im_org.org_id_matched as org_id, oi.org_name, duty " \ "FROM crawl_private.x_org_executive_info tb_main " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.x_org_executive_info GROUP BY person_id) tb_latest " \ "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \ "JOIN base.id_match im " \ "ON im.source_id = tb_main.person_id AND im.id_type = 3 AND im.source = '{sid}' AND im.is_used = 1 " \ "LEFT JOIN base.person_info pi " \ "ON im.matched_id = pi.person_id " \ "JOIN (SELECT matched_id as org_id_matched, source_id as org_id_source FROM base.id_match " \ "WHERE id_type = 2 AND source = '{sid}' AND is_used = 1) im_org " \ "ON im_org.org_id_source = tb_main.org_id " \ "LEFT JOIN base.org_info oi " \ "ON im_org.org_id_matched = oi.org_id".format(sid=SOURCE) inp = MysqlInput(ENGINE, sql) DUTY_ORD = { "执行事务合伙人(委派代表)": 0, "合伙人": 1, "法定代表人": 2, "董事长": 3, "总经理": 4, "董事总经理": 5, "执行董事": 6, "副总经理": 7, "监事": 8, "合规风控": 9, "信息填报负责人": 10, "其他": 11 } vm = transform.ValueMap( OrderedDict([[ OrgPersonMapping.duty_detail.name, (lambda x: ",".join( sorted(x.split(","), key=lambda d: DUTY_ORD.get(d, 10000)) ), "duty") ], [OrgPersonMapping.duty.name, "高管"], [OrgPersonMapping.is_current.name, 1]])) sk = transform.MapSelectKeys({ "matched_id": OrgPersonMapping.person_id.name, PersonInfo.person_name.name: None, "org_id": OrgPersonMapping.org_id.name, "org_name": OrgPersonMapping.org_name.name, OrgPersonMapping.duty.name: None, OrgPersonMapping.duty_detail.name: None, OrgPersonMapping.is_current.name: None }) s = base.Stream(inp, transform=(vm, sk)) return s
def stream_010101(cls): sql = "SELECT im.matched_id as person_id, pi.person_name, xpc.status, xpc.org_name as org_name_ " \ "FROM crawl_private.x_person_certificate_010101 xpc " \ "JOIN base.id_match im ON xpc.person_id = im.source_id " \ "LEFT JOIN base.person_info pi ON im.matched_id = pi.person_id " \ "WHERE im.source = '010101' AND im.id_type = 3 AND im.is_used = 1" def org_name_dict(): import pandas as pd sql_oi = "SELECT org_full_name, org_id, org_name FROM base.org_info" res = pd.read_sql(sql_oi, ENGINE) v = tuple([(oid, oname) for oid, oname in zip(res["org_id"], res["org_name"])]) res = dict(zip(res["org_full_name"], v)) return res inp = MysqlInput(ENGINE, sql) d = org_name_dict() vm = transform.ValueMap({ OrgPersonMapping.org_id.name: (lambda x: d.get(x)[0], "org_name_"), OrgPersonMapping.org_name.name: (lambda x: d.get(x)[1], "org_name_"), OrgPersonMapping.is_current.name: (lambda x: { "正常": 1, "离职": 0 }.get(x), "status"), OrgPersonMapping.duty.name: None }) sk = transform.MapSelectKeys({ "person_id": OrgPersonMapping.person_id.name, "person_name": OrgPersonMapping.person_name.name, OrgPersonMapping.org_id.name: None, "org_name": OrgPersonMapping.org_name.name, OrgPersonMapping.is_current.name: None, OrgPersonMapping.duty.name: None }) dn = transform.Dropna(subset=[ OrgPersonMapping.person_id.name, OrgPersonMapping.org_id.name ]) s = base.Stream(inp, [vm, sk, dn]) return s
def stream_020002_ts401(cls): source_id = "020002" tb = "crawl_private.d_fund_info" tmpsql = cls.SQL_X01.format(tb=tb, sid=source_id) inp = MysqlInput(ENGINE, tmpsql) vm0 = transform.ValueMap({ "type_name": lambda x: "" if x.replace("-", "") == "" else x, "stype_name": lambda x: "" if x.replace("-", "") == "" else x, }) vm = transform.ValueMap( OrderedDict([ [FundTypeSource.source_id.name, source_id], [FundTypeSource.typestandard_code.name, "101"], [FundTypeSource.typestandard_name.name, "按投资策略分类"], [ "__cached", (lambda tn, stn: cls.TS_MAPPER_401.get( (tn, stn), [None] * 4), "type_name", "stype_name") ], [FundTypeSource.type_code.name, (lambda x: x[0], "__cached")], [FundTypeSource.type_name.name, (lambda x: x[2], "__cached")], [FundTypeSource.stype_code.name, (lambda x: x[1], "__cached")], [FundTypeSource.stype_name.name, (lambda x: x[3], "__cached")], ])) sk = transform.MapSelectKeys({ "matched_id": FundTypeSource.fund_id.name, "fund_name": FundTypeSource.fund_name.name, FundTypeSource.source_id.name: None, FundTypeSource.typestandard_code.name: None, FundTypeSource.typestandard_name.name: None, FundTypeSource.type_code.name: None, FundTypeSource.type_name.name: None, FundTypeSource.stype_code.name: None, FundTypeSource.stype_name.name: None, }) dn = transform.Dropna(subset=[ FundTypeSource.type_code.name, FundTypeSource.stype_code.name ], how="all") s = base.Stream(inp, [vm0, vm, sk, dn]) return s
def stream_010001(): import re sql = "SELECT im.matched_id, `name`, qualifying_way FROM crawl_private.x_org_executive_info tb_main " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.x_org_executive_info GROUP BY person_id) tb_latest " \ "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \ "JOIN base.id_match im ON im.source_id = tb_main.person_id " \ "AND im.id_type = 3 AND im.source = '010001' AND im.is_used = 1 " inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap( OrderedDict([ [ PersonInfo.person_name.name, (lambda x: re.sub("(.*)", "", x), "name") ], [ PersonInfo.person_name_py.name, (lambda x: "".join( [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(), PersonInfo.person_name.name) ], [ PersonInfo.fund_qualification_way.name, (lambda x: { "通过考试": "通过考试", "资格认定": "资格认定" }.get(x), "qualifying_way") ], [ PersonInfo.is_fund_qualification.name, (lambda x: int(bool(x)), PersonInfo.fund_qualification_way.name) ], ])) sk = transform.MapSelectKeys({ "matched_id": PersonInfo.person_id.name, PersonInfo.person_name.name: None, PersonInfo.person_name_py.name: None, PersonInfo.fund_qualification_way.name: None, PersonInfo.is_fund_qualification.name: None }) s = base.Stream(inp, transform=(vm, sk)) return s
def stream_020002(): DATA_SOURCE = "020002" session = dbsession(bind=ENGINE) stmt = session.query(IdMatch).join( DOrgPerson, and_(IdMatch.source_id == DOrgPerson.person_id, IdMatch.data_source == DOrgPerson.data_source, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).join( PersonInfo, and_(IdMatch.matched_id == PersonInfo.person_id, IdMatch.id_type == 3, IdMatch.is_used == 1, IdMatch.data_source == DATA_SOURCE)).with_entities( IdMatch.matched_id.label( OrgPersonMapping.person_id.name), PersonInfo.person_name, DOrgPerson.org_id, DOrgPerson.tenure_date, DOrgPerson.dimission_date, DOrgPerson.is_current, DOrgPerson.duty) inp = MysqlInput(session.bind, stmt) fi = stream_oi(DATA_SOURCE) jn = transform.Join(fi, left_on=DOrgPerson.org_id.name, right_on=IdMatch.source_id.name) sk = transform.MapSelectKeys({ OrgPersonMapping.person_id.name: None, PersonInfo.person_name.name: OrgPersonMapping.person_name.name, IdMatch.matched_id.name: OrgPersonMapping.org_id.name, DOrgPerson.duty.name: OrgPersonMapping.duty.name, OrgInfo.org_name.name: OrgPersonMapping.org_name.name, DOrgPerson.tenure_date.name: OrgPersonMapping.tenure_date.name, DOrgPerson.dimission_date.name: OrgPersonMapping.dimission_date.name, DOrgPerson.is_current.name: OrgPersonMapping.is_current.name }) s = base.Stream(inp, (jn, sk)) return s
def stream4_60504(cls): tc = (10403,) tmp_sql = cls.SQL_BASE1.format(tc=sf.SQL.values4sql(tc)) inp = MysqlInput(ENGINE, tmp_sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 605, FundTypeMappingImport.typestandard_name.name: "按基金类型分类", FundTypeMappingImport.type_code.name: 60504, FundTypeMappingImport.type_name.name: "其他投资基金", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s
def stream_020002(): sql = "SELECT im.matched_id, person_name, background FROM crawl_private.d_person_info tb_main " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.d_person_info GROUP BY person_id) tb_latest " \ "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \ "JOIN base.id_match im ON im.source_id = tb_main.person_id " \ "AND im.id_type = 3 AND im.source = '020002' AND im.is_used = 1 " inp = MysqlInput(ENGINE, sql) inp.dataframe inp.dataframe["background"].drop_duplicates().tolist() vm = transform.ValueMap( OrderedDict([ [PersonInfo.person_name.name, lambda x: x.strip()], [ PersonInfo.person_name_py.name, (lambda x: "".join( [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(), "person_name") ], [ PersonInfo.background.name, (lambda x: { '公募': '公募', '其它': "其他", '券商': "券商", '海外': "海外", "民间": "民间", "学者": "学者", '实业': "实业", '保险': "保险", '媒体': "媒体", '期货': '期货' }.get(x, "其他"), "background") ], ])) sk = transform.MapSelectKeys({ "matched_id": PersonInfo.person_id.name, "person_name": PersonInfo.person_name.name, PersonInfo.person_name_py.name: None, PersonInfo.background.name: None, }) s = base.Stream(inp, transform=(vm, sk)) return s
def stream_602_2(cls): sql = "SELECT fi.fund_id, fi.fund_name, fi.fund_full_name " \ "FROM base.fund_info fi " \ "WHERE fi.fund_id NOT IN (SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 602) " inp = MysqlInput(ENGINE, sql) def clean_tc(fund_full_name): patt = "优先级|(?<!\w)A级|劣后级|进取级|B级|普通级|次级|风险级|中间级|管理级|结构化|分级" # A级前面不为数字和字母 matched = set(re.findall(patt, fund_full_name)) if ("优先级" in matched) or ("A级" in matched): return 60202, "结构化", 6020202, "优先级" elif ("劣后级" in matched) or ("进取级" in matched)\ or ("B级" in matched) or ("普通级" in matched)\ or ("次级" in matched) or ("风险级" in matched): return 60202, "结构化", 6020203, "进取级/劣后级" elif "中间级" in matched: return 60202, "结构化", 6020204, "中间级(夹层)" elif "管理级" in matched: return 60202, "结构化", 6020205, "管理型份额" elif ("结构化" in matched) or ("分级" in matched): return 60202, "结构化", None, None return None, None, None, None vm0 = transform.ValueMap({ "__tmp": (lambda x: clean_tc(x), "fund_full_name") }) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 602, FundTypeMappingImport.typestandard_name.name: "按结构类型分类", FundTypeMappingImport.type_code.name: (lambda x: x[0], "__tmp"), FundTypeMappingImport.type_name.name: (lambda x: x[1], "__tmp"), FundTypeMappingImport.stype_code.name: (lambda x: x[2], "__tmp"), FundTypeMappingImport.stype_name.name: (lambda x: x[3], "__tmp"), FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) dn = transform.Dropna(subset=[FundTypeMappingImport.type_code.name, FundTypeMappingImport.stype_code.name], how="all") s = base.Stream(inp, [vm0, vm, sk, dn]) return s
def stream19_60410(cls): tc = (30405, 40499,) tmp_sql = cls.SQL_BASE2.format(tc=sf.SQL.values4sql(tc), n=len(tc)) inp = MysqlInput(ENGINE, tmp_sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 604, FundTypeMappingImport.typestandard_name.name: "按发行主体分类", FundTypeMappingImport.type_code.name: 60410, FundTypeMappingImport.type_name.name: "其他", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s
def stream_010005_ts104(cls): source_id = "010005" tb = "crawl_private.x_fund_info_futures" tmpsql = cls.SQL_104.format(tb=tb, sid=source_id) inp = MysqlInput(ENGINE, tmpsql) vm = transform.ValueMap( { FundTypeSource.source_id.name: source_id, FundTypeSource.typestandard_code.name: "104", FundTypeSource.typestandard_name.name: "按发行主体分类", FundTypeSource.type_code.name: cls.TS_MAPPER_104["期货公司及其子公司的资产管理计划"][0], FundTypeSource.type_name.name: cls.TS_MAPPER_104["期货公司及其子公司的资产管理计划"][1] } ) s = base.Stream(inp, [vm, cls.SK]) return s
def stream8_60401(cls): sql = "SELECT fund_id, fund_name FROM base.fund_info " \ "WHERE fund_full_name LIKE '%%信托计划' " \ "AND fund_id NOT IN (SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 604)" inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 604, FundTypeMappingImport.typestandard_name.name: "按发行主体分类", FundTypeMappingImport.type_code.name: 60401, FundTypeMappingImport.type_name.name: "信托", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s
def stream9_60310(cls): tmp_sql = "SELECT fund_id, fund_name FROM base.fund_info " \ "WHERE fund_id NOT IN (" \ "SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 603) " \ "AND fund_full_name LIKE '%%新三板%%'" inp = MysqlInput(ENGINE, tmp_sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 603, FundTypeMappingImport.typestandard_name.name: "按投资标的分类", FundTypeMappingImport.type_code.name: 60310, FundTypeMappingImport.type_name.name: "新三板", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s
def stream_op_020002(cls): SOURCE = "020002" sql = "SELECT im.matched_id, pi.person_name, im_org.org_id_matched as org_id, oi.org_name, duty " \ "FROM crawl_private.d_org_person tb_main " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.d_org_person GROUP BY person_id) tb_latest " \ "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \ "JOIN base.id_match im " \ "ON im.source_id = tb_main.person_id AND im.id_type = 3 AND im.source = '{sid}' AND im.is_used = 1 " \ "LEFT JOIN base.person_info pi " \ "ON im.matched_id = pi.person_id " \ "JOIN (SELECT matched_id as org_id_matched, source_id as org_id_source FROM base.id_match " \ "WHERE id_type = 2 AND source = '{sid}' AND is_used = 1) im_org " \ "ON im_org.org_id_source = tb_main.org_id " \ "LEFT JOIN base.org_info oi " \ "ON im_org.org_id_matched = oi.org_id".format(sid=SOURCE) inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap( OrderedDict( [[OrgPersonMapping.duty_detail.name, (lambda x: x, "duty")], [ OrgPersonMapping.duty.name, (lambda x: { "基金经理": "基金经理" }.get(x, "其他"), "duty") ], [OrgPersonMapping.is_current.name, 1]])) sk = transform.MapSelectKeys({ "matched_id": OrgPersonMapping.person_id.name, PersonInfo.person_name.name: None, "org_id": OrgPersonMapping.org_id.name, "org_name": OrgPersonMapping.org_name.name, OrgPersonMapping.duty.name: None, OrgPersonMapping.duty_detail.name: None, OrgPersonMapping.is_current.name: None }) s = base.Stream(inp, transform=(vm, sk)) return s
def _stream_02_constructor(cls, source_id): SQL_02 = "SELECT im_p.matched_id as person_id, im_f.matched_id as fund_id, dpf.source_id, " \ "fi.foundation_date, fi.end_date, fi.fund_status, " \ "pi.person_name, fi.fund_name, dpf.is_current " \ "FROM {tb} dpf " \ "JOIN (SELECT person_id, MAX(version) latest_ver FROM {tb} GROUP BY person_id) t " \ "ON dpf.person_id = t.person_id AND dpf.version = t.latest_ver " \ "JOIN (SELECT matched_id, source_id, source FROM base.id_match WHERE is_used = 1 AND id_type = 3) im_p " \ "ON im_p.source_id = dpf.person_id AND im_p.source = dpf.source_id " \ "JOIN (SELECT matched_id, source_id, source FROM base.id_match WHERE is_used = 1 AND id_type = 1) im_f " \ "ON im_f.source_id = dpf.fund_id AND im_f.source = dpf.source_id " \ "LEFT JOIN base.person_info pi ON im_p.matched_id = pi.person_id " \ "LEFT JOIN base.fund_info fi ON im_f.matched_id = fi.fund_id " \ "WHERE dpf.source_id = {sid} AND dpf.is_used = 1 ".format(tb=source_id) table = "crawl_private.d_person_fund" tmp_sql = SQL_02.format(tb=table, sid=source_id) inp = MysqlInput(ENGINE_RD, tmp_sql) s = base.Stream(inp, transform=[]) return s
def stream_010002_ts102(cls): source_id = "010002" tb = "crawl_private.x_fund_info_fundaccount" tmpsql = cls.SQL_102.format(tb=tb, sid=source_id) inp = MysqlInput(ENGINE, tmpsql) vm = transform.ValueMap( { FundTypeSource.source_id.name: source_id, FundTypeSource.typestandard_code.name: "102", FundTypeSource.typestandard_name.name: "按结构类型分类", FundTypeSource.type_code.name: (lambda x: cls.TS_MAPPER_102.get(x, [None, None])[0], "fund_type_allocation_amac"), FundTypeSource.type_name.name: (lambda x: cls.TS_MAPPER_102.get(x, [None, None])[1], "fund_type_allocation_amac"), } ) dn = transform.Dropna(subset=[FundTypeSource.fund_id.name, FundTypeSource.type_code.name]) s = base.Stream(inp, [vm, cls.SK, dn]) return s
def stream_020003(): session = dbsession(bind=ENGINE) query_oh = session.query(IdMatch).join( DFundInfo, and_(IdMatch.id_type == 1, IdMatch.source_id == DFundInfo.fund_id, IdMatch.data_source == DFundInfo.data_source, IdMatch.data_source == "020003") ).outerjoin( FundInfo, and_(IdMatch.id_type == 1, IdMatch.matched_id == FundInfo.fund_id) ).filter( IdMatch.is_used == 1 ).with_entities( FundInfo.fund_id, FundInfo.fund_name, IdMatch.data_source, DFundInfo.fund_type ) inp = MysqlInput(session.bind, query_oh) vm = transform.ValueMap( OrderedDict([ [FundTypeMappingSource.type_code.name, (lambda ds, tp: etl.EnumMap.DFundInfo.type_code4.get((ds, tp)), DFundInfo.data_source.name, DFundInfo.fund_type.name)], [FundTypeMappingSource.type_name.name, (lambda tc: etl.Enum.TypeMapping.type_name.get(tc), FundTypeMappingSource.type_code.name)], [FundTypeMappingSource.typestandard_code.name, (lambda tc: tc[:-2], FundTypeMappingSource.type_code.name)], [FundTypeMappingSource.typestandard_name.name, (lambda tsc: etl.Enum.TypeMapping.typestandard_name.get(tsc), FundTypeMappingSource.typestandard_code.name)] ]) ) dn = transform.Dropna(subset=[FundInfo.fund_id.name, FundTypeMappingSource.typestandard_code.name]) sk = transform.MapSelectKeys( { FundInfo.fund_id.name: None, FundInfo.fund_name.name: None, IdMatch.data_source.name: FundTypeMappingSource.data_source.name, FundTypeMappingSource.typestandard_code.name: None, FundTypeMappingSource.typestandard_name.name: None, FundTypeMappingSource.type_code.name: None, FundTypeMappingSource.type_name.name: None, } ) s = base.Stream(inp, transform=(vm, dn, sk)) return s
def stream_020002(cls): sql = cls.BASE_SQL.format(sid='020002', upt=cls.UPT_SINCE) inp = MysqlInput(cls.engine, sql) vm = transform.ValueMap({ "total_share": lambda x: cls.parse(x), "total_asset": lambda x: cls.parse(x), "purchase_amount": lambda x: cls.parse(x), "redemption_amount": lambda x: cls.parse(x), }) sk = transform.MapSelectKeys({ "fund_id": None, "fund_name": None, "statistic_date": None, "total_share": None, "total_asset": None, "purchase_amount": None, "redemption_amount": None, }) s = base.Stream(inp, transform=[vm, sk]) return s
def stream_602_3(cls): sql = "SELECT fi.fund_id, fi.fund_name, fts.type_code FROM fund_info fi " \ "JOIN fund_type_source fts ON fi.fund_id = fts.fund_id " \ "WHERE fi.fund_id NOT IN (" \ "SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 602)" \ "AND fts.type_code IN (10201, 10202, 30201, 30202)" inp = MysqlInput(ENGINE, sql) structured, unstructured = (60202, "结构化", None, None), (60201, "非结构化", None, None) d = { 10201: structured, 10202: unstructured, 30201: structured, 30202: unstructured } vm0 = transform.ValueMap({ "__tmp": (lambda x: d.get(x, (None, None, None, None)), "type_code") }) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 602, FundTypeMappingImport.typestandard_name.name: "按结构类型分类", FundTypeMappingImport.type_code.name: (lambda x: x[0], "__tmp"), FundTypeMappingImport.type_name.name: (lambda x: x[1], "__tmp"), FundTypeMappingImport.stype_code.name: (lambda x: x[2], "__tmp"), FundTypeMappingImport.stype_name.name: (lambda x: x[3], "__tmp"), FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) dn = transform.Dropna(subset=[FundTypeMappingImport.type_code.name, FundTypeMappingImport.stype_code.name], how="all") s = base.Stream(inp, [vm0, vm, sk, dn]) return s
def stream_602_1(cls): sql = "SELECT fi.fund_id, fi.fund_name, fi.fund_full_name, fp.portfolio_type " \ "FROM base.fund_info fi " \ "JOIN base.fund_portfolio fp ON fi.fund_id = fp.fund_id " \ "WHERE fi.fund_id NOT IN (SELECT DISTINCT fund_id FROM base.fund_type_mapping_import WHERE typestandard_code = 602) " \ "AND fp.portfolio_type = 1" inp = MysqlInput(ENGINE, sql) vm = transform.ValueMap({ FundTypeMappingImport.typestandard_code.name: 602, FundTypeMappingImport.typestandard_name.name: "按结构类型分类", FundTypeMappingImport.type_code.name: 60202, FundTypeMappingImport.type_name.name: "结构化", FundTypeMappingImport.stype_code.name: 6020201, FundTypeMappingImport.stype_name.name: "基础份额", FundTypeMappingImport.classified_by.name: CLASSIFIED_BY }) sk = transform.MapSelectKeys(cls.FIXED_FIELDS) s = base.Stream(inp, [vm, sk]) return s