Exemple #1
0
    def stream_consultant_010004(cls, d_xoi=None):
        """
            清洗010004源(证券公司)发行产品的投顾公司配对信息;

        Args:
            d_xoi:

        Returns:

        """

        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_securities xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_securities GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010004' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010004'" \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        if d_xoi is None:
            d_xoi = cls._d_oi_consultant()

        vm = transform.ValueMap(
            OrderedDict([
                ("fund_issue_org_amac", lambda x: x.strip()),
                ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
                (FundOrgMapping.org_type.name, (lambda x: {"主动管理": "投资顾问"}.get(x), "manage_type_amac")),
                (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
            ])
        )

        jn = transform.Join(inp2, how="left", on="org_id")

        dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        vm2 = transform.ValueMap({
            FundOrgMapping.org_type_code.name: (lambda x: 1 if x == "投资顾问" else None, FundOrgMapping.org_type.name),
        })

        dd = transform.DropDuplicate(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        sk = transform.MapSelectKeys(cls.FIELDS)
        s = Stream(inp, transform=[vm, jn, dn, vm2, dd, sk])  # 先去空, 避免空值产生, 导致整形列变成浮点型
        return s
Exemple #2
0
    def stream_2(cls):
        """
            清洗 d_org_info;
        """

        sql = "select INBBM,IGSDM,GPDM from usrZQZB"

        inp = pd.read_sql(sql, engine2)

        sk = transform.MapSelectKeys({
            "INBBM": "INBBM",
            'IGSDM': 'IGSDM',
            'GPDM': 'ZQDM'
        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #3
0
def stream_d_fund_info_020001():
    def clean_regcode(x):
        patt = "\D\w\d{4}"
        sre = re.search(patt, x)
        if sre is not None:
            return sre.group()
        return None

    def clean_locktime(x):
        patt = "\D\w\d{4}"
        sre = re.search(patt, x)
        if sre is not None:
            return sre.group()
        return None

    inp = MysqlInput(engine_c, "\
    SELECT TB_MAIN.version, TB_MAIN.fund_id, TB_MAIN.source_id, fund_name, fund_full_name, reg_code, fund_status, locked_time_limit \
    open_date, \
    FROM d_fund_info TB_MAIN \
    JOIN (SELECT fund_id, MAX(version) latest_ver, source_id FROM d_fund_info WHERE source_id = '020001' and is_used = 1 GROUP BY fund_id, source_id) as TB_LATEST \
    ON TB_MAIN.version = TB_LATEST.latest_ver AND TB_MAIN.fund_id = TB_LATEST.fund_id AND TB_MAIN.source_id = TB_LATEST.source_id")

    vm = transform.ValueMap(
        {
            "fund_name": lambda x: remove_blank(x),
            "fund_full_name": lambda x: remove_blank(x),
            "reg_code": lambda x: lambda x: clean_regcode(x),
            "locked_time_limit": lambda x: transform.CleanWrong("-|--"),
            "fund_status": {"正常": "运行中", "终止": "终止"}
        }
    )

    km = transform.MapSelectKeys(
        {
            # "version": None,
            "fund_id": "source_id",
            "fund_name": None,
            "fund_full_name": None,
            "reg_code": None,
            "fund_status": None,
            "issuing_scale_amac": "issuing_scale",
            "number_clients_amac": "number_clients",
        }
    )

    stream = Stream(inp.frame, ())
    return stream
Exemple #4
0
def stream_010001():
    session = dbsession(bind=engine_r)
    stmt = session.query(DBondInfo).join(
        BondInfo, DBondInfo.bond_id == BondInfo.bond_id).filter(
            DBondInfo.source_id == "010001").with_entities(
                DBondInfo.bond_id, DBondInfo.source_id,
                DBondInfo.bond_full_name, DBondInfo.interest_type,
                DBondInfo.interest_freq)
    inp = MysqlInput(session.bind, stmt)

    def clean_interest_freq(type_, freq, fname):
        if fname.find("贴现") >= 0:
            return "贴现"

        if type_ in {"零息", "贴现"}:
            return "贴现"
        elif type_ == "一次还本息":
            return "到期一次还本付息"
        else:

            d_freq = {
                "到期一次还本付息":
                "到期一次还本付息",  # 除去type_为"零息"的情况, 其余freq为"到期一次还本付息"映射为"到期一次还本付息"
                "按年付息": "年付息",
                "按半年付息": "半年付息",
                "按季度付息": "季度付息",
                "按月付息": "月付息",
            }
            return d_freq.get(freq)

    vm = transform.ValueMap({
        BondInfo.interest_freq.name:
        (lambda type_, freq, fname: clean_interest_freq(type_, freq, fname),
         DBondInfo.interest_type.name, DBondInfo.interest_freq.name,
         DBondInfo.bond_full_name.name)
    })

    dn = transform.Dropna(subset=[BondInfo.interest_freq.name])

    km = transform.MapSelectKeys({
        DBondInfo.bond_id.name: BondInfo.bond_id.name,
        BondInfo.interest_freq.name: None,
    })

    s = Stream(inp, (vm, dn, km))

    return s
Exemple #5
0
    def stream_000001(cls):
        sql = "SELECT fund_id FROM base.fund_info"
        inp = MysqlNativeInput(engine_base, sql)
        vm = transform.ValueMap({
            "region": (lambda x: region.get(x), "fund_id"),

        })

        km = transform.MapSelectKeys(
            {
                "fund_id": "fund_id",
                "region": "region"
            }
        )

        stream = Stream(inp, transform=[vm, km])
        return stream
Exemple #6
0
    def stream_000001(cls):
        sql = "SELECT fund_id FROM base.fund_info"
        inp = MysqlNativeInput(engine_base, sql)
        vm = transform.ValueMap({
            "init_nav": (lambda x: init_nav.get(x), "fund_id"),
        })

        vm2 = transform.ValueMap(
            {"init_nav": lambda x: 1 if np.isnan(x) else x})

        km = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "init_nav": "init_nav"
        })

        stream = Stream(inp, transform=[vm, vm2, km])
        return stream
Exemple #7
0
    def stream_020003(cls):
        """
            清洗 fund_custodian;

        """
        sql = "SELECT idh.matched_id, ff.fund_custodian, fi.fund_name FROM ( \
                SELECT matched_id,source_id FROM base_public.id_match \
                where id_type = 1 AND is_used = 1 AND data_source = '020003') as idh \
                JOIN \
                (SELECT * FROM ( \
                SELECT MAX(version) as mm, fund_id as id FROM crawl_public.d_fund_info GROUP BY fund_id) as idm \
                JOIN crawl_public.d_fund_info as df \
                ON idm.id = df.fund_id AND idm.mm = df.version \
                WHERE df.data_source = '020003') AS ff \
                ON ff.id = idh.matched_id \
                JOIN base_public.fund_info as fi \
                ON fi.fund_id = idh.matched_id \
                where idh.matched_id <> '777777'"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "org_id": (lambda x: cls.org_full_name.get(x), "fund_custodian"),
        })

        vm2 = transform.ValueMap({
            "type_code": (lambda x: cls.type_code.get(x), "org_id"),
        })

        vm3 = transform.ValueMap({
            "type_name": (lambda x: cls.type_name.get(x), "type_code"),
        })

        dr = transform.Dropna(axis=0, how="any")

        sk = transform.MapSelectKeys({
            "matched_id": "fund_id",
            "org_id": "org_id",
            "fund_custodian": "org_name",
            "fund_name": "fund_name",
            "type_code": "type_code",
            "type_name": "type_name"
        })

        s = Stream(inp, transform=[vm, vm2, vm3, dr, sk])
        return s
Exemple #8
0
def dfundnv2src(fund_id=None, source_id=None, update_time=None):
    def cal_added_by_nv_and_adjnv(added_nv, nv, adj_nv, sid):
        if sid != "020003":
            if added_nv != 0:
                return added_nv
            else:
                return None
        else:
            if nv == adj_nv:
                return nv
            else:
                return None

    session = dbsession(bind=engine_r)
    stmt = session.query(DFundNv).join(
        IdMatch, and_(IdMatch.source_id == DFundNv.fund_id, IdMatch.source == DFundNv.source_id, IdMatch.id_type == 1, IdMatch.is_used == 1)
    ).filter(
        DFundNv.is_used == 1, DFundNv.source_id == source_id, DFundNv.statistic_date > MIN_DATE, DFundNv.nav < 2000
    ).with_entities(
        IdMatch.matched_id, DFundNv.fund_id, DFundNv.statistic_date, DFundNv.source_id, DFundNv.nav, DFundNv.added_nav, DFundNv.adjusted_nav
    )
    if update_time is not None:
        stmt = stmt.filter(DFundNv.update_time >= update_time)
    if fund_id is not None:
        stmt = stmt.filter(DFundNv.fund_id == fund_id)
    inp = MysqlInput(session.bind, stmt)

    vm = transform.ValueMap({
        # DFundNv.added_nav.name: lambda x: None if x == 0 else x
        DFundNv.added_nav.name: (
            lambda added_nv, nv, adj_nv, sid: cal_added_by_nv_and_adjnv(added_nv, nv, adj_nv, sid),
            DFundNv.added_nav.name, DFundNv.nav.name, DFundNv.adjusted_nav.name, DFundNv.source_id.name
        )
    })

    km = transform.MapSelectKeys({
        IdMatch.matched_id.name: FundNvDataSource.fund_id.name,
        DFundNv.statistic_date.name: FundNvDataSource.statistic_date.name,
        DFundNv.source_id.name: FundNvDataSource.source_id.name,
        DFundNv.nav.name: FundNvDataSource.nav.name,
        DFundNv.added_nav.name: FundNvDataSource.added_nav.name,
        DFundNv.adjusted_nav.name: FundNvDataSource.adjusted_nav.name,
    })
    s = Stream(inp, (vm, km))
    return s
Exemple #9
0
    def stream_main(cls, fund_ids=None):
        c = cls.conflu1(fund_ids)

        fn = transform.FillnaByColumn({
            "foundation_date": "reg_time",
            "fund_name": "fund_full_name"
        })

        vm = transform.ValueMap({
            "fund_status":
            lambda x: "运行中" if type(x) is not str else x,
            "is_umbrella_fund":
            (lambda x: 1 if type(x) is str and ('伞型' or '伞形') in x else 0,
             "fund_full_name"),
            "limit_time":
            lambda x: "无限期" if type(x) is float and x > 900 else x,
            "currency":
            lambda x: "人民币" if type(x) is not str else x,
        })

        sk = transform.MapSelectKeys({
            "source_id": "source_id",
            "limit_date": "limit_date",
            "issuing_scale": "issuing_scale",
            "fund_name": "fund_name",
            "foundation_date": "foundation_date",
            "fund_full_name": "fund_full_name",
            "open_date": "open_date",
            "number_clients": "number_clients",
            "currency": "currency",
            "reg_code": "reg_code",
            "limit_time": "limit_time",
            "end_date": "end_date",
            "currency_type": "currency_type",
            "is_abnormal_liquidation": "is_abnormal_liquidation",
            "fund_id": "fund_id",
            "reg_time": "reg_time",
            "fund_status": "fund_status",
            "locked_time_limit": "locked_time_limit",
            "liquidation_cause": "liquidation_cause",
            "is_umbrella_fund": "is_umbrella_fund"
        })

        s = Stream(c, transform=([fn, vm, sk]))
        return s
Exemple #10
0
    def stream_010001(cls):
        """
            清洗 x_org_executive_info;

        """

        sql = "SELECT im.matched_id, `name`, qualifying_way FROM crawl_private.x_org_executive_info tb_main " \
              "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.x_org_executive_info GROUP BY " \
              "person_id) tb_latest " \
              "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \
              "JOIN base.id_match im ON im.source_id = tb_main.person_id " \
              "AND im.id_type = 3 AND im.source = '010001' AND im.is_used = 1 "

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "name": (lambda x: re.sub("(.*)", "", x), "name"),
            "qualifying_way":
            lambda x: {
                "通过考试": "通过考试",
                "资格认定": "资格认定"
            }.get(x) if type(x) is str else x,
            "person_name_py": (lambda x: "".join(
                [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(),
                               "name"),
        })

        vm2 = transform.ValueMap({
            "is_fund_qualification": (lambda x: int(bool(x)), "qualifying_way")
        })

        sk = transform.MapSelectKeys({
            "matched_id":
            "person_id",
            'qualifying_way':
            'fund_qualification_way',
            'name':
            'person_name',
            "person_name_py":
            "person_name_py",
            "is_fund_qualification":
            "is_fund_qualification"
        })
        s = Stream(inp, transform=[vm, vm2, sk])
        return s
Exemple #11
0
    def conflu(cls):
        s01, s11 = cls.stream_000001(), cls.stream_010001()

        p = {
            0: {
                "org_full_name": ("source_id", "010001"),
            },
            1: {
                "org_full_name": ("source_id", "000001"),
            },
        }
        c = Confluence(s01, s11, on=[OrgInfo.org_id.name], prio_l1=p)

        dk = transform.DropKeys(["source_id"])

        s = Stream(c, transform=[dk])

        return Confluence(s)
Exemple #12
0
    def fund_info_subsidiary(cls):
        """
            fund_info_subsidiary 映射;

        """

        sql = " \
               SELECT idm.matched_id, fis.risk_income_character,fis.investment_restriction, fis.investment_target, \
                fis.income_distribution FROM ( \
                SELECT DISTINCT matched_id FROM base.id_match WHERE id_type = 1 and is_used =1  \
                ) as idm \
                JOIN \
                (SELECT fund_id, investment_restriction, investment_target, risk_income_character, \
                income_distribution FROM base.fund_info_subsidiary) as fis \
                ON fis.fund_id = idm.matched_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            'income_distribution':
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            'investment_restriction':
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            'investment_target':
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            'risk_income_character':
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
        })

        sk = transform.MapSelectKeys({
            "matched_id":
            "fund_id",
            "income_distribution":
            "income_distribution",
            'investment_restriction':
            'investment_restriction',
            'investment_target':
            'investment_target',
            'risk_income_character':
            'risk_income_character'
        })

        s = Stream(inp, transform=[vm, sk])
        return s
Exemple #13
0
def stream_x_fund_info_010005(fund_ids=None):
    sql = "\
    SELECT TB_MAIN.version, im.matched_id, TB_MAIN.fund_id, fund_name_amac, reg_code_amac, foundation_date_amac, issuing_scale_amac, number_clients_amac \
    FROM x_fund_info_futures TB_MAIN \
    JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_futures WHERE is_used = 1 GROUP BY fund_id) as TB_LATEST \
    ON TB_MAIN.version = TB_LATEST.latest_ver AND TB_MAIN.fund_id = TB_LATEST.fund_id \
    JOIN base.id_match im ON im.source_id = TB_MAIN.fund_id AND id_type = 1 AND source = '010005' AND im.is_used = 1 "

    if fund_ids is not None:
        fids = SQL.values4sql(fund_ids)
        sql += "WHERE im.matched_id IN {fids}".format(fids=fids)

    inp = MysqlNativeInput(engine_c, sql)

    vm = transform.ValueMap(
        OrderedDict([
            ("fund_name_amac", lambda x: x.strip()),
            ("reg_code_amac", lambda x: x.strip()),
            ("number_clients_amac", lambda x: int(x)),
        ]))

    cln = transform.CleanWrongToNone({"reg_code_amac": ""}, repls=None)

    ac = transform.AddConst({"source_id": "010005"})

    # jn = transform.Join(inp2, left_on="fund_id", right_on="source_id")

    dd = transform.SortDropduplicate(sort_by=["version", "matched_id"],
                                     ascending=[False, True],
                                     subset="matched_id",
                                     keep="first")

    km = transform.MapSelectKeys({
        "matched_id": "fund_id",
        "source_id": None,
        "fund_name_amac": "fund_full_name",
        "reg_code_amac": "reg_code",
        "foundation_date_amac": "foundation_date",
        "issuing_scale_amac": "issuing_scale",
        "number_clients_amac": "number_clients",
    })

    stream = Stream(inp, (vm, cln, ac, dd, km), name="x_fund_info_future")
    return stream
Exemple #14
0
    def y_org_description(cls):
        """
            清洗 y_org_description;

        """

        sql = "SELECT org_id,profile,team,investment_idea,prize,major_shareholder,shareholder_structure FROM crawl_private.`y_org_description`"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "investment_idea":
            lambda x: sub_wrong_to_none(x),
            "profile":
            lambda x: sub_wrong_to_none(x),
            "team":
            lambda x: sub_wrong_to_none(x),
            "prize":
            lambda x: sub_wrong_to_none(x),
            "major_shareholder":
            lambda x: sub_wrong_to_none(x),
            "shareholder_structure":
            lambda x: sub_wrong_to_none(x),
        })

        sk = transform.MapSelectKeys({
            'org_id':
            'org_id',
            'investment_idea':
            'investment_idea',
            'profile':
            'profile',
            'team':
            'team',
            'prize':
            'prize',
            'major_shareholder':
            'major_shareholder',
            'shareholder_structure':
            'shareholder_structure'
        })

        s = Stream(inp, transform=[vm, sk])
        return s
Exemple #15
0
def stream_010001():
    session = dbsession(bind=engine_r)
    stmt = session.query(DBondInfo).join(
        BondInfo, DBondInfo.bond_id == BondInfo.bond_id).filter(
            and_(DBondInfo.source_id == "010001",
                 DBondInfo.is_used == 1)).with_entities(
                     DBondInfo.bond_id, DBondInfo.source_id,
                     DBondInfo.bond_full_name, DBondInfo.interest_type)
    inp = MysqlInput(session.bind, stmt)

    def clean_interest_type(type_, fname):
        if fname.find("贴现") >= 0:
            return "零息"

        else:
            d_type = {
                "零息": "零息",
                "固定利息": "固定利率",
                "浮动利息": "浮动利率",
            }

            res = d_type.get(type_)
            if res is None:
                if fname.find("短期融资券") >= 0:
                    return "固定利率"
            return res

    vm = transform.ValueMap({
        BondInfo.interest_type.name:
        (lambda type_, fname: clean_interest_type(type_, fname),
         DBondInfo.interest_type.name, DBondInfo.bond_full_name.name)
    })

    dn = transform.Dropna(subset=[BondInfo.interest_type.name])

    km = transform.MapSelectKeys({
        DBondInfo.bond_id.name: BondInfo.bond_id.name,
        BondInfo.interest_type.name: None
    })

    s = Stream(inp, (vm, dn, km))

    return s
Exemple #16
0
    def stream_020003(cls):
        """
            清洗公募数米 fund_info;

        """

        sql = "select * FROM (SELECT fund_id,fund_name,fund_full_name,data_source, \
                    foundation_date,fund_status,	purchase_status,redemption_status, \
                    aip_status,	recommendation_start,recommendation_end,init_raise \
                    FROM crawl_public.d_fund_info  \
                    WHERE data_source = '020003' ORDER BY version DESC ) AS T \
                    GROUP  BY T.fund_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "fund_status":
            lambda x: sub_wrong_to_none(x),
            "purchase_status":
            lambda x: cls.PURCHASE_STATUS_020003.get(x),
            "init_raise":
            lambda x: sub_wrong_to_none(x)
        })

        vm2 = transform.ValueMap(
            {"init_raise": lambda x: float(re.sub("亿", "", x))})

        sk = transform.MapSelectKeys({
            "data_source": "source_id",
            'fund_id': 'fund_id',
            'fund_name': 'fund_name',
            'fund_full_name': 'fund_full_name',
            'foundation_date': 'foundation_date',
            'fund_status': 'fund_status',
            'purchase_status': 'purchase_status',
            'redemption_status': 'redemption_status',
            'aip_status': 'aip_status',
            'recommendation_start': 'recommendation_start',
            'recommendation_end': 'recommendation_end',
            'init_raise': 'init_raise',
        })
        s = Stream(inp, transform=[vm, vm2, sk])
        return s
Exemple #17
0
    def stream_manager_010005(cls, d_xoi=None):
        """
            清洗010005源(期货公司)发行产品的基金管理人配对信息;

        Args:
            d_xoi:

        Returns:

        """

        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_futures xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_futures GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010005' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi " \
                     "ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010005' " \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        if d_xoi is None:
            d_xoi = cls._d_oi_manager()

        vm = transform.ValueMap(OrderedDict([
            ("fund_issue_org_amac", lambda x: x.strip()),
            ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
            (FundOrgMapping.org_type.name, "基金管理人"),  # 该来源的fund_issue_org_amac都是基金管理人;
            (FundOrgMapping.org_type_code.name, 2),
            (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
        ]))

        jn = transform.Join(inp2, how="left", on="org_id")

        sk = transform.MapSelectKeys(cls.FIELDS)
        dn = transform.Dropna(
            subset=[FundOrgMapping.fund_id.name, FundOrgMapping.org_id.name, FundOrgMapping.org_type_code.name])

        s = Stream(inp, transform=(vm, jn, sk, dn,))
        return s
Exemple #18
0
    def end_date(cls):
        """
            清洗 fund_manager_mapping;

        """

        sql = "SELECT idm.matched_id, fmg.end_date FROM (SELECT DISTINCT matched_id FROM base.id_match) as idm \
              JOIN base.fund_manager_mapping AS fmg ON idm.matched_id = fmg.fund_id \
              WHERE end_date> '{time}' GROUP BY fund_id".format(
            time=cls.datetime)

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "end_date": "end_date",
            'matched_id': 'fund_id'
        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #19
0
    def stream_020001(cls):
        """
            清洗d_fund_se(020001)

        """

        sql = "SELECT * FROM  \
                (SELECT * from ( \
                SELECT fund_id,fund_name,source_id,stock_id,stock_name,statistic_date,stock_ratio,stock_sum, \
                stock_change,variable_rate,change_num from crawl_private.d_fund_security \
                where source_id = '020001' ORDER BY version DESC) AS T \
                GROUP BY T.fund_id,T.stock_id,T.statistic_date) as dfs \
                LEFT JOIN \
                (SELECT matched_id,source_id as source_id2 from base.id_match where id_type=1 and source = '020001') as i \
                ON dfs.fund_id = i.source_id2 \
                WHERE i.matched_id is not NULL"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            'security_category': '股票',
            'source': '第三方',
            'source_id': '020001'
        })

        sk = transform.MapSelectKeys({
            'statistic_date': 'statistic_date',
            'matched_id': 'fund_id',
            'fund_name': 'fund_name',
            'source_id': 'source_id',
            'security_category': 'security_category',
            'stock_id': 'id',
            'stock_name': 'name',
            'stock_sum': 'sum',
            'stock_ratio': 'ratio',
            'value': 'value',
            'stock_change': 'change',
            'variable_rate': 'change_rate',
            'source': 'source'
        })
        s = Stream(inp, transform=[vm, sk])
        return s
Exemple #20
0
    def stream_010101(cls):
        sql = "SELECT im.matched_id as person_id, pi.person_name, xpc.status, xpc.org_name as org_name_ " \
              "FROM crawl_private.x_person_certificate_010101 xpc " \
              "JOIN base.id_match im ON xpc.person_id = im.source_id " \
              "LEFT JOIN base.person_info pi ON im.matched_id = pi.person_id " \
              "WHERE im.source = '010101' AND im.id_type = 3 AND im.is_used = 1 " \
              "AND xpc.org_name not like '空'"

        def org_name_dict():
            import pandas as pd
            sql_oi = "SELECT org_full_name, org_id, org_name FROM base.org_info"
            res = pd.read_sql(sql_oi, ENGINE)
            v = tuple([(oid, oname) for oid, oname in zip(res["org_id"], res["org_name"])])
            res = dict(zip(res["org_full_name"], v))
            return res

        inp = MysqlInput(ENGINE, sql)

        d = org_name_dict()

        vm = transform.ValueMap({
            "org_id": (lambda x: d.get(x)[0], "org_name_"),
            "org_name": (lambda x: d.get(x)[1], "org_name_"),
            "is_current": (lambda x: {"正常": "1", "离职": "0"}.get(x), "status"),
            "duty": None
        })

        sk = transform.MapSelectKeys({
            "person_id": "person_id",
            "person_name": "person_name",
            "org_id": "org_id",
            "org_name": "org_name",
            "is_current": "is_current",
            "duty": "duty"
        })

        dn = transform.Dropna(
            subset=["person_id", "org_id"]
        )

        s = Stream(inp, [vm, sk, dn])
        return s
Exemple #21
0
    def stream_020001(cls):
        """
            清洗 d_org_shareholder;

        """

        sql = " \
                SELECT idm.matched_id, doi.data_source, doi.statistic_date, doi.shareholder_name, \
                doi.shareholder_num, doi.capital_stock, doi.stock_held, doi.proportion_held \
                FROM (SELECT matched_id,source_id from base_public.id_match \
                WHERE data_source = '020001' AND is_used = 1 and id_type = 2) as idm \
                JOIN crawl_public.d_org_shareholder AS doi \
                ON doi.org_id = idm.source_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "capital_stock": lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            "stock_held": lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            "proportion_held": lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
        })

        vm2 = transform.ValueMap({
            "capital_stock": lambda x: float(x) if type(x) is str else x,
            "stock_held": lambda x: float(x) if type(x) is str else x,
            "proportion_held": lambda x: round(float(x)/100, 6) if type(x) is str else x,


        })

        sk = transform.MapSelectKeys({
            "matched_id": "org_id",
            'data_source': 'data_source',
            'statistic_date': 'statistic_date',
            'capital_stock': 'capital_stock',
            'stock_held': 'stock_held',
            'proportion_held': 'proportion_held',
            'shareholder_num': 'shareholder_num',
            'shareholder_name': 'shareholder_name'
        })
        s = Stream(inp, transform=[vm, vm2, sk])
        return s
Exemple #22
0
    def stream_manager(cls):
        """
            清洗 fund_manager_mapping;

        """

        sql = 'SELECT fi.fund_id, GROUP_CONCAT(fmm.person_name SEPARATOR ",") as person_name \
                FROM base_public.fund_info fi \
                JOIN base_public.fund_manager_mapping fmm ON fi.fund_id = fmm.fund_id \
                WHERE fmm.is_current = 1 \
                GROUP BY fund_id'

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "person_name": "fund_manager",
            'fund_id': 'fund_id'
        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #23
0
    def stream_manager(cls):
        """
            清洗 fund_manager_mapping;

        """

        sql = 'SELECT fi.matched_id, GROUP_CONCAT(fmm.person_name SEPARATOR ",") as person_name  \
                FROM (SELECT DISTINCT matched_id FROM base.id_match WHERE id_type = 1 AND is_used = 1) fi  \
                JOIN base.fund_manager_mapping fmm ON fi.matched_id = fmm.fund_id  \
                WHERE fmm.is_current is NULL  or fmm.is_current > 0 \
                GROUP BY fund_id '

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "person_name": "fund_member",
            'matched_id': 'fund_id'
        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #24
0
    def stream_d_fund_info_020003(cls, fund_ids=None):
        sql = "\
        SELECT TB_MAIN.version, im.matched_id, TB_MAIN.fund_id, TB_MAIN.source_id, fund_name, fund_full_name, fund_status, locked_time_limit, \
        open_date, foundation_date \
        FROM d_fund_info TB_MAIN \
        JOIN (SELECT fund_id, MAX(version) latest_ver, source_id FROM d_fund_info WHERE source_id = '020003' AND is_used = 1 GROUP BY fund_id, source_id) AS TB_LATEST \
        ON TB_MAIN.version = TB_LATEST.latest_ver AND TB_MAIN.fund_id = TB_LATEST.fund_id AND TB_MAIN.source_id = TB_LATEST.source_id \
        JOIN base.id_match im ON im.source_id = TB_MAIN.fund_id AND id_type = 1 AND source = '020003' AND im.is_used = 1 "

        if fund_ids is not None:
            fids = SQL.values4sql(fund_ids)
            sql += "WHERE im.matched_id IN {fids}".format(fids=fids)

        inp = MysqlNativeInput(engine_c, sql)

        vm = transform.ValueMap({
            "fund_status": {
                "正在运行": "运行中",
                "已清盘": "终止",
                "封闭运行": "运行中"
            },
            "open_date":
            lambda x: sub_wrong_to_none(x) if type(x) is str else x
        })

        dd = transform.SortDropduplicate(sort_by=["version", "matched_id"],
                                         ascending=[False, True],
                                         subset="matched_id",
                                         keep="first")

        km = transform.MapSelectKeys({
            "matched_id": "fund_id",
            "source_id": "source_id",
            "fund_status": None,
            "open_date": None,
            "foundation_date": None,
            "fund_name": "fund_name"
        })

        stream = Stream(inp, transform=[vm, dd, km])
        return stream
Exemple #25
0
def stream_x_fund_info_security():
    def clean_date(x):

        try:
            return dt.datetime.strptime(x, "%Y-%m-%d").date()

        except:
            return None

    inp = MysqlInput(engine_c, "\
    SELECT TB_MAIN.version, TB_MAIN.fund_id, fund_name_amac, reg_code_amac, foundation_date_amac, fund_time_limit_amac \
    FROM x_fund_info_securities TB_MAIN \
    JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_securities WHERE is_used = 1 GROUP BY fund_id) as TB_LATEST \
    ON TB_MAIN.version = TB_LATEST.latest_ver AND TB_MAIN.fund_id = TB_LATEST.fund_id")

    vm = transform.ValueMap(
        OrderedDict(
            [
                ("fund_name_amac", lambda x: remove_blank(x)),
                ("reg_code_amac", lambda x: remove_blank(x)),
                ("limit_date", (lambda x: clean_date(x), "fund_time_limit_amac")),
                ("fund_time_limit_amac", lambda x: x if x == "无期限" else None)
            ]
        )
    )

    km = transform.MapSelectKeys(
        {
            # "version": None,
            "fund_id": "source_id",
            "fund_name_amac": "fund_full_name",
            "reg_code_amac": "reg_code",
            "foundation_date_amac": "foundation_date",
            "fund_time_limit_amac": "limit_time",
            "limit_date": "limit_date",
            "fund_status_amac": "fund_status",
        }
    )

    stream = Stream(inp.frame, (vm, km))
    return stream
Exemple #26
0
    def stream_is_reg(cls):

        sql = "SELECT idm.matched_id, idd.matched_id AS is_reg FROM \
                (SELECT DISTINCT matched_id FROM base.id_match WHERE id_type = 1 AND is_used = 1) AS idm \
                LEFT JOIN \
                (SELECT DISTINCT matched_id FROM base.id_match WHERE  \
                source in  ('010002','010003','010004','010005') and is_used =1 and id_type=1 ) AS idd \
                ON idm.matched_id = idd.matched_id"

        inp = MysqlNativeInput(ENGINE_RD, sql)

        vm = transform.ValueMap(
            {"is_reg": lambda x: 1 if type(x) is str else 0})

        km = transform.MapSelectKeys({
            "matched_id": "fund_id",
            "is_reg": "is_reg"
        })

        stream = Stream(inp, transform=[vm, km])
        return stream
Exemple #27
0
    def stream_y(cls):
        sql = "SELECT org_id,org_name,person_id,person_name,duty,duty_detail," \
              "tenure_date,is_current FROM crawl_private.y_org_person_mapping"

        inp = MysqlInput(ENGINE, sql)

        sk = transform.MapSelectKeys(
            {
                "person_id": "person_id",
                "person_name": "person_name",
                "org_id": "org_id",
                "org_name": "org_name",
                "duty": "duty",
                "duty_detail": "duty_detail",
                "is_current": "is_current",
                'tenure_date': "tenure_date"
            }
        )

        s = Stream(inp, transform=[sk])
        return s
Exemple #28
0
    def stream_custodian(cls):
        """
            清洗 fund_custodian;

        """

        sql = " \
                SELECT idm.matched_id, fom.org_name FROM (SELECT DISTINCT matched_id FROM base_public.id_match \
                WHERE id_type = 1 AND is_used = 1) AS idm \
                JOIN base_public.fund_org_mapping \
                AS fom ON idm.matched_id = fom.fund_id WHERE fom.type_code = 2"

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "org_name": "fund_custodian",
            'matched_id': 'fund_id',

        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #29
0
    def stream_type(cls):
        """
            清洗 fund_type;

        """

        sql = " \
                SELECT * FROM (SELECT DISTINCT matched_id FROM base_public.id_match WHERE id_type = 1 AND is_used = 1) AS idm \
                JOIN (SELECT fund_id,type_name FROM base_public.fund_type_mapping WHERE typestandard_code = '02') \
                AS ftm ON idm.matched_id = ftm.fund_id \
"

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "type_name": "fund_type",
            'fund_id': 'fund_id',

        })
        s = Stream(inp, transform=[sk])
        return s
Exemple #30
0
    def stream_custodian(cls):

        sql = "select ff.fund_id,GROUP_CONCAT(o.org_name SEPARATOR ',') as org_name from   \
                ((SELECT DISTINCT matched_id as fund_id from base.id_match where id_type=1 and is_used=1) as ff \
                JOIN   \
                (SELECT fund_id,org_id from base.fund_org_mapping WHERE org_type_code = 5) as fom \
                ON ff.fund_id = fom.fund_id    \
                LEFT JOIN \
                (SELECT org_id,org_name from base.org_info) as o \
                on fom.org_id = o.org_id) \
                GROUP BY ff.fund_id"

        inp = MysqlNativeInput(ENGINE_RD, sql)

        km = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "org_name": "fund_custodian"
        })

        stream = Stream(inp, transform=[km])
        return stream