Ejemplos de Stream.Stream en Python, ejemplos de utils.etlkit.core.base.Stream.Stream en Python

Ejemplo n.º 1

0

Mostrar archivo

    def stream_020003_resume(cls):


        sql = "select idm.matched_id, dd.resume FROM \
                (SELECT matched_id,source,source_id FROM base.id_match \
                where id_type = 3 and is_used = 1 AND source = '020003') as idm \
                JOIN \
                (SELECT * FROM \
                (select MAX(version) as mm,person_id as pi,source_id as si \
                from crawl_private.d_person_description GROUP BY person_id) ma \
                JOIN crawl_private.d_person_description as dpd \
                ON dpd.person_id = ma.pi and ma.mm = dpd.version) dd \
                ON dd.pi = idm.source_id \
                AND dd.si = idm.source"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "investment_years": (lambda x: cls.clean_01(x) if type(x) is str else x, 'resume')
        })

        sk = transform.MapSelectKeys({
            "matched_id": "person_id",
            'investment_years': 'investment_years'
        })
        dp = transform.Dropna(how='any', axis=0)
        s = Stream(inp, transform=[vm, sk, dp])
        return s

Ejemplo n.º 2

0

Mostrar archivo

    def conflu_master_strategy(cls):
        # master_strategy

        # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况;
        sql = "SELECT t1.org_id, t2.stype_name FROM {tb_test} t1 " \
              "LEFT JOIN (SELECT oi.org_id, ftm.stype_name " \
              "FROM {tb_test} oi " \
              "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
              "JOIN (SELECT fund_id, stype_code, stype_name FROM fund_type_mapping " \
              "WHERE typestandard_code = 601 AND stype_code <> 6010901 AND flag = 1) ftm ON fom.fund_id = ftm.fund_id " \
              "WHERE fom.org_type_code = 1 " \
              "GROUP BY oi.org_id, ftm.stype_code " \
              "ORDER BY org_id ASC, COUNT(fom.fund_id) DESC) t2 ON t1.org_id = t2.org_id ".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql)

        dd = transform.DropDuplicate(subset=["org_id"])

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            "stype_name": OrgInfo.master_strategy.name
        })

        s = Stream(inp, [dd, sk])

        return Confluence(s)

Ejemplo n.º 3

0

Mostrar archivo

    def stream_010001(cls):
        """
            清洗x_org_info(010001)

        """

        sql = "SELECT org_id,legal_person_resume,special_tips FROM ( \
                SELECT * FROM (SELECT matched_id,source_id FROM base.id_match WHERE  \
                source='010001' and id_type=2 and is_used=1) \
                as b LEFT JOIN \
                (SELECT org_id,legal_person_resume,special_tips FROM \
                crawl_private.`x_org_info`  \
                ORDER BY version DESC) as p ON b.source_id=p.org_id \
                WHERE p.org_id is not NULL) AS T \
                GROUP BY T.org_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "legal_person_resume":
            lambda x: sub_wrong_to_none(x),
            "special_tips":
            lambda x: sub_wrong_to_none(x)
        })

        sk = transform.MapSelectKeys({
            'org_id':
            'org_id',
            "special_tips":
            "special_tips",
            "legal_person_resume":
            "legal_person_resume"
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 4

0

Mostrar archivo

Archivo: fund_info_DEP.py Proyecto: dxcv/fund

def stream_y_fund_info(fund_ids=None):
    sql = "\
    SELECT TB_MAIN.version, TB_MAIN.fund_id, fund_name, fund_full_name, fund_status, liquidation_cause, end_date \
    FROM y_fund_info TB_MAIN \
    JOIN (SELECT fund_id, MAX(version) as latest_ver FROM y_fund_info WHERE is_used = 1 GROUP BY fund_id) as TB_LATEST \
    ON TB_MAIN.version = TB_LATEST.latest_ver AND TB_MAIN.fund_id = TB_LATEST.fund_id "

    if fund_ids is not None:
        fids = SQL.values4sql(fund_ids)
        sql += "WHERE TB_MAIN.fund_id IN {fids}".format(fids=fids)

    inp = MysqlNativeInput(engine_c, sql)

    ac = transform.AddConst({"source_id": "000001"})

    dd = transform.SortDropduplicate(sort_by=["version", "fund_id"],
                                     ascending=[False, True],
                                     subset=["fund_id"])

    km = transform.MapSelectKeys({
        "fund_id": None,
        "fund_name": None,
        "fund_full_name": None,
        "fund_name_en": None,
        "source_id": None,
        "fund_status": None,
        "liquidation_cause": None,
        "end_date": None
    })
    stream = Stream(inp, (
        ac,
        dd,
        km,
    ))
    return stream

Ejemplo n.º 5

0

Mostrar archivo

    def conflu_is_reg_now(cls):
        # is_reg_now

        sql = "SELECT xoi.org_id as org_id_, oi.org_id " \
              "FROM crawl_private.x_org_info xoi " \
              "JOIN (SELECT DISTINCT org_id, version FROM crawl_private.x_org_info WHERE version >= {ver} ) tmp " \
              "ON xoi.org_id = tmp.org_id AND xoi.version = tmp.version " \
              "JOIN (SELECT matched_id, source_id FROM base.id_match WHERE id_type = 2 AND source = '010001' AND is_used = 1) im " \
              "ON xoi.org_id = im.source_id " \
              "RIGHT JOIN (SELECT org_id FROM {tb_test} WHERE org_category = '私募基金管理公司') oi " \
              "ON im.matched_id = oi.org_id ".format(tb_test=TEST_TABLE, ver=(dt.datetime.now() - dt.timedelta(8)).strftime("%Y%m%d%H"))

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            OrgInfo.is_reg_now.name: (lambda x: "否" if x is None else "是", "org_id_")
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            OrgInfo.is_reg_now.name: None
        })

        s = Stream(inp, [vm, sk])
        return Confluence(s)

Ejemplo n.º 6

0

Mostrar archivo

    def stream_020002(cls):
        """
            清洗 d_org_asset_scale;

        """

        sql = " \
                select idm.matched_id, doa.org_name, doa.data_source, doa.statistic_date, doa.total_asset, doa.funds_num \
                FROM crawl_public.d_org_asset_scale as doa \
                JOIN base_public.id_match as idm \
                ON doa.org_id = idm.source_id \
                WHERE idm.id_type = 2 AND idm.is_used = 1 AND doa.data_source = '020002' \
                AND idm.data_source = '020002'"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "total_asset":
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x
        })

        sk = transform.MapSelectKeys({
            "matched_id": "org_id",
            # 'org_name': 'org_name',
            'data_source': 'data_source',
            'statistic_date': 'statistic_date',
            'total_asset': 'total_asset',
            'funds_num': 'funds_num'
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 7

0

Mostrar archivo

    def stream(cls):

        sql = "SELECT idm.person_id FROM( \
                SELECT person_id FROM base.person_info) as idm \
                JOIN \
                base.org_person_mapping as op \
                on op.person_id = idm.person_id \
                WHERE  op.duty_detail IN ( \
                  '基金经理', \
                  '投资总监', \
                  '投资经理', \
                  '投研总监', \
                  '风控总监', \
                  '投资部经理', \
                  '投资决策委员会主席', \
                  '总经理', \
                  '投资部主管' \
                 )GROUP BY idm.person_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "is_core_member": "是"
        })

        sk = transform.MapSelectKeys({
            "person_id": "person_id",
            'is_core_member': 'is_core_member'
        })

        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 8

0

Mostrar archivo

Archivo: recommendation_start.py Proyecto: dxcv/fund

    def stream_recommendation_end(cls):
        """
            清洗 recommendation_end;

        """

        sql = " \
                SELECT idm.matched_id, dff.recommendation_end, dff.recommendation_start FROM \
                (SELECT DISTINCT matched_id from base_public.id_match WHERE id_type = 1 AND is_used =1) AS idm \
                JOIN \
                (SELECT * FROM ( \
                SELECT fund_id, recommendation_end, recommendation_start FROM crawl_public.d_fund_info  \
                WHERE recommendation_end <> '0000-00-00' and recommendation_start <> '0000-00-00') AS dfi  \
                GROUP BY dfi.fund_id) as dff \
                ON idm.matched_id = dff.fund_id"

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "recommendation_end": "recommendation_end",
            "recommendation_start": "recommendation_start",
            'matched_id': 'fund_id',
        })
        s = Stream(inp, transform=[sk])
        return s

Ejemplo n.º 9

0

Mostrar archivo

Archivo: type_csrc.py Proyecto: dxcv/fund

    def stream_000001(cls):
        """
            清洗stock_info_010001 CSRC分类;

        """

        sql = "SELECT stock_id,type_csrc FROM crawl_finance.stock_info_010001"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "type_csrc":
            lambda x: re.sub("\s", "",
                             x.split(",")[1]),
            "category_code": (lambda x: cls.TYPE_CODE.get(x), "type_csrc"),
            "category_name": (lambda x: cls.TYPE_NAME.get(x), "type_csrc"),
            "type_code": (lambda x: cls.STYPE_CODE.get(x), "type_csrc"),
            "stock_id":
            lambda x: str(cls.add_stock_suffix(x)) + ".SH"
        })

        sk = transform.MapSelectKeys({
            "stock_id": "stock_id",
            "category_name": "category_name",
            "category_code": "category_code",
            "type_code": "type_code",
            "type_csrc": "type_name"
        })

        dn = transform.Dropna(subset=None, axis=0, how='any')

        s = Stream(inp, transform=[vm, sk, dn])
        return s

Ejemplo n.º 10

0

Mostrar archivo

    def completion_020001(cls):
        sql = "SELECT fund_id, statistic_date, holder_type, share_held \
                FROM crawl_public.d_fund_holder  \
                WHERE data_source = '020001' \
                AND fund_id IN (SELECT DISTINCT fund_id FROM base_public.fund_holder)"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "share_held": lambda x: sub_wrong_to_none(x),
            "holder_type": lambda x: cls.hold_type_dict.get(x)

        })

        vm2 = transform.ValueMap({
            "share_held": lambda x: round(x, 6) if type(x) is float else x
        })

        sk = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "share_held": "share_held",
            "holder_type": "holder_type",
            "statistic_date": "statistic_date"
        })

        s = Stream(inp, transform=[vm, vm2, sk])
        return s

Ejemplo n.º 11

0

Mostrar archivo

    def stream_020001(cls):
        sql = "\
                SELECT fund_id, statistic_date, holder_type \
                FROM crawl_public.d_fund_holder  \
                WHERE data_source = '020001' \
                AND fund_id IN (SELECT matched_id FROM \
                base_public.id_match where id_type = 1 AND is_used = 1) \
                AND fund_id NOT in \
                (SELECT DISTINCT fund_id FROM crawl_public.d_fund_holder \
                WHERE data_source = '020002')"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "holder_type": lambda x: cls.hold_type_dict.get(x)

        })

        sk = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "holder_type": "holder_type",
            "statistic_date": "statistic_date"
        })

        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 12

0

Mostrar archivo

Archivo: fund_nv.py Proyecto: dxcv/fund

    def stream_020002(cls):

        sql = "SELECT fi.fund_id, fi.fund_name, fns.data_source, \
                fns.statistic_date, fns.nav, fns.added_nav \
                FROM base_public.fund_info fi \
                JOIN data_test.fund_nv_source_test fns ON fi.fund_id = fns.fund_id \
                JOIN ( \
                SELECT fund_id, statistic_date  \
                FROM data_test.fund_nv_source_test WHERE update_time BETWEEN '{start}' AND '{end}' \
                GROUP BY fund_id, statistic_date \
                ) fupt ON fns.fund_id = fupt.fund_id AND fns.statistic_date = fupt.statistic_date \
                WHERE fns.data_source = '020002' AND fns.is_used = 1".format(
            start=cls.start, end=cls.end)

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "fund_id": "fund_id",
            'data_source': 'data_source',
            'statistic_date': 'statistic_date',
            'nav': 'nav',
            'added_nav': 'added_nav',
            "fund_name": "fund_name"
        })
        s = Stream(inp, transform=[sk])
        return s

Ejemplo n.º 13

0

Mostrar archivo

    def stream_securities(cls):
        """
            清洗 x_info_securities（010004）;

        """

        sql = " \
               SELECT id.matched_id, ac.orientation_amac FROM ( \
                SELECT A.matched_id, A.source_id FROM  \
                (SELECT matched_id, source_id, entry_time FROM base.id_match \
                WHERE source = '010004' and is_used = 1 and id_type = 1 ORDER BY entry_time DESC) \
                 AS A  GROUP BY A.matched_id) AS id  \
                JOIN \
                (SELECT * FROM (SELECT MAX(version) mm ,fund_id as maid FROM crawl_private.x_fund_info_securities \
                GROUP BY fund_id) AS p \
                JOIN crawl_private.x_fund_info_securities as ff \
                on ff.fund_id = p.maid and ff.version = p.mm) AS ac \
                ON ac.maid = id.source_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            'orientation_amac':
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x
        })
        dr = transform.Dropna(axis=0, how="any")

        sk = transform.MapSelectKeys({
            "matched_id": "fund_id",
            "orientation_amac": "investment_range"
        })

        s = Stream(inp, transform=[vm, dr, sk])
        return s

Ejemplo n.º 14

0

Mostrar archivo

    def stream_020002_investment_years(cls):
        """
            清洗 d_person_info;

        """

        sql = "select idm.matched_id, dd.investment_years FROM \
                (SELECT matched_id,source,source_id FROM base.id_match \
                where id_type = 3 and is_used = 1 AND source = '020002') as idm \
                JOIN \
                (SELECT * FROM \
                (select MAX(version) as mm,person_id as pi,source_id as si  \
                FROM crawl_private.d_person_info GROUP BY person_id) as ma \
                JOIN crawl_private.d_person_info as dpd \
                ON dpd.person_id = ma.pi and ma.mm = dpd.version) dd \
                ON dd.pi = idm.source_id \
                AND dd.si = idm.source"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "investment_years": lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
        })

        sk = transform.MapSelectKeys({
            "matched_id": "person_id",
            'investment_years': 'investment_years'
        })
        dp = transform.Dropna(how="any", axis=0)

        s = Stream(inp, transform=[vm, sk, dp])
        return s

Ejemplo n.º 15

0

Mostrar archivo

Archivo: fund_description.py Proyecto: dxcv/fund

    def stream_020005(cls):
        """
            清洗 d_org_info;

        """

        sql = " \
                SELECT idm.matched_id, dfi.open_date, dfi.locked_time_limit, dfi.min_purchase_amount, \
                dfi.min_append_amount,dfi.fee_subscription,dfi.fee_redeem, dfi.fee_manage,dfi.duration, \
                dfi.fee_pay, dfi.source_id \
                FROM \
                (SELECT matched_id, source_id FROM base.id_match where id_type=1 and is_used=1 AND source='020001' GROUP BY matched_id) as idm \
                JOIN \
                (SELECT MAX(version) maxversion, fund_id FROM crawl_private.d_fund_info WHERE source_id = '020001' GROUP BY fund_id) as ma \
                ON idm.source_id = ma.fund_id \
                JOIN crawl_private.d_fund_info as dfi \
                on dfi.version = ma.maxversion and dfi.fund_id = ma.fund_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "open_date":
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x,
            "locked_time_limit":
            lambda x: cls.sub_wrong_to_none(x) if type(x) is str else x
        })

        sk = transform.MapSelectKeys({
            "matched_id": "fund_id",
            'locked_time_limit': 'locked_time_limit',
            'open_date': 'open_date',
            'source_id': 'source_id'
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 16

0

Mostrar archivo

Archivo: fund_position_stock_main.py Proyecto: dxcv/fund

    def stream_020001(cls, fund_id):
        """
            清洗 d_fund_position（020001）;

        """

        sql = " \
                SELECT dp.fund_id, dp.fund_name, dp.data_source, dp.statistic_date, dp.subject_id, dp.subject_name,  \
                dp.quantity, dp.scale, dp.proportion, fa.total_asset FROM  \
                crawl_public.d_fund_position  as dp \
                JOIN base_public.fund_asset_scale AS fa \
                ON dp.fund_id = fa.fund_id and dp.statistic_date=fa.statistic_date \
                WHERE  dp.type= '股票' AND dp.data_source= '020001'"

        if fund_id is not None:
            fids = SQL.values4sql(fund_id)
            sql += "AND dp.fund_id IN {fids}".format(fids=fids)

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "quantity":
            lambda x: sub_wrong_to_none(x),
            "scale":
            lambda x: sub_wrong_to_none(x),
            "proportion":
            lambda x: sub_wrong_to_none(x),
            "total_asset":
            lambda x: float(x) if type(x) is str else x
        })

        vm1 = transform.ValueMap({
            "quantity": lambda x: cls.clean_amount(x),
            "scale": lambda x: cls.clean_amount(x)
        })

        vm2 = transform.ValueMap({
            "quantity":
            lambda x: float(x),
            "scale":
            lambda x: float(x),
            "proportion":
            lambda x: float(x) / 100 if type(x) is str else x
        })

        sk = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "fund_name": "fund_name",
            "data_source": "data_source",
            "statistic_date": "statistic_date",
            "subject_id": "subject_id",
            "subject_name": "subject_name",
            # "quantity": "quantity",
            "scale": "scale",
            "proportion": "proportion_net",
            "total_asset": "asset_scale"
        })

        s = Stream(inp, transform=[vm, vm1, vm2, sk])
        return s

Ejemplo n.º 17

0

Mostrar archivo

Archivo: fund_nv_source.py Proyecto: dxcv/fund

    def stream_02000x(cls):


        sql = " \
                SELECT idm.matched_id, df.data_source, df.statistic_date, \
                df.nav, df.added_nav, fi.fund_name \
                FROM (SELECT matched_id,source_id,data_source \
                FROM base_public.id_match WHERE id_type = 1 and is_used = 1) AS idm \
                JOIN \
                crawl_public.d_fund_nv AS df \
                on idm.source_id = df.fund_id \
                AND idm.data_source = df.data_source \
                JOIN base_public.fund_info as fi \
                ON fi.fund_id = df.fund_id \
                WHERE df.update_time BETWEEN '{start}' \
                AND '{end}' and is_used = 1".format(start=cls.start, end=cls.end)

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "matched_id": "fund_id",
            'data_source': 'data_source',
            'statistic_date': 'statistic_date',
            'nav': 'nav',
            'added_nav': 'added_nav',
            "fund_name": "fund_name"

        })

        s = Stream(inp, transform=[sk])
        return s

Ejemplo n.º 18

0

Mostrar archivo

Archivo: fund_nv_source_incremental.py Proyecto: dxcv/fund

def gfundnv2src_whole(fund_id=None, source_id=None, update_time=None):
    session = dbsession(bind=engine_r)
    stmt = session.query(GFundNv).filter(
        GFundNv.source_id == source_id,
        GFundNv.is_used == 1).with_entities(GFundNv.fund_id, GFundNv.source_id,
                                            GFundNv.statistic_date,
                                            GFundNv.source_id, GFundNv.nav,
                                            GFundNv.added_nav,
                                            GFundNv.adjusted_nav)
    if update_time is not None:
        stmt = stmt.filter(GFundNv.update_time >= update_time)
    if fund_id is not None:
        stmt = stmt.filter(GFundNv.fund_id == fund_id)

    inp = MysqlInput(session.bind, stmt)

    km = transform.MapSelectKeys({
        GFundNv.fund_id.name:
        FundNvDataSource.fund_id.name,
        GFundNv.statistic_date.name:
        FundNvDataSource.statistic_date.name,
        GFundNv.source_id.name:
        FundNvDataSource.source_id.name,
        GFundNv.nav.name:
        FundNvDataSource.nav.name,
        GFundNv.added_nav.name:
        FundNvDataSource.added_nav.name,
        GFundNv.adjusted_nav.name:
        FundNvDataSource.adjusted_nav.name,
    })
    s = Stream(inp, (km, ))
    return s

Ejemplo n.º 19

0

Mostrar archivo

Archivo: bond_interest_type.py Proyecto: dxcv/fund

def stream_000001():
    session = dbsession(bind=engine_r)
    stmt = session.query(YBondInfo).join(
        BondInfo, YBondInfo.bond_id == BondInfo.bond_id).filter(
            and_(YBondInfo.is_used == 1,
                 YBondInfo.is_used == 1)).with_entities(
                     YBondInfo.bond_id, YBondInfo.interest_type)
    inp = MysqlInput(session.bind, stmt)

    dn = transform.Dropna(subset=[YBondInfo.interest_type.name])

    km = transform.MapSelectKeys({
        YBondInfo.bond_id.name:
        BondInfo.bond_id.name,
        YBondInfo.source_id.name:
        None,
        YBondInfo.interest_type.name:
        BondInfo.interest_type.name,
    })

    s = Stream(inp, (
        dn,
        km,
    ))

    return s

Ejemplo n.º 20

0

Mostrar archivo

Archivo: fund_nv_source_whole.py Proyecto: dxcv/fund

def tfundnv2src(fund_id=None, source_id=None, update_time=None):
    session = dbsession(bind=engine_r)
    stmt = session.query(TFundNv).join(
        IdMatch, and_(IdMatch.source_id == TFundNv.fund_id, IdMatch.source == TFundNv.source_id, IdMatch.id_type == 1, IdMatch.is_used == 1)
    ).filter(
        TFundNv.is_used == 1, TFundNv.source_id == source_id, TFundNv.statistic_date > MIN_DATE, TFundNv.nav < 2000
    ).with_entities(
        IdMatch.matched_id, TFundNv.fund_id, TFundNv.statistic_date, TFundNv.source_id, TFundNv.nav, TFundNv.added_nav
    )
    if update_time is not None:
        stmt = stmt.filter(TFundNv.update_time >= update_time)
    if fund_id is not None:
        stmt = stmt.filter(TFundNv.fund_id == fund_id)

    inp = MysqlInput(session.bind, stmt)

    vm = transform.ValueMap({
        TFundNv.added_nav.name: lambda x: None if x == 0 else x
    })

    km = transform.MapSelectKeys({
        IdMatch.matched_id.name: FundNvDataSource.fund_id.name,
        TFundNv.statistic_date.name: FundNvDataSource.statistic_date.name,
        TFundNv.source_id.name: FundNvDataSource.source_id.name,
        TFundNv.nav.name: FundNvDataSource.nav.name,
        TFundNv.added_nav.name: FundNvDataSource.added_nav.name,
    })
    s = Stream(inp, (vm, km))
    return s

Ejemplo n.º 21

0

Mostrar archivo

Archivo: bond_info.py Proyecto: dxcv/fund

def stream_010001():
    """
    债券基本信息(上海证交所)
    """
    session = dbsession(bind=engine_r)

    stmt = Session().query(DBondInfo).filter(
        and_(DBondInfo.source_id == "010001",
             DBondInfo.bond_id.like("%\.__"))).with_entities(
                 DBondInfo.bond_id,
                 DBondInfo.source_id,
                 DBondInfo.bond_full_name,  # 名称类数据
                 DBondInfo.issue_price,
                 DBondInfo.issue_amount,  # 发行价格类数据
                 DBondInfo.coupon_rate,
                 DBondInfo.par_value,  # 条款类数据
                 DBondInfo.maturity_date,
                 DBondInfo.issue_date_start,
                 DBondInfo.issue_date_end,
                 DBondInfo.listing_date,  # 日期类数据
                 DBondInfo.consigner,
                 DBondInfo.consignee,  # 相关机构数据
             )

    inp = MysqlOrmInput(session.bind, stmt)

    km = transform.MapSelectKeys({
        DBondInfo.bond_id.name:
        BondInfo.bond_id.name,
        DBondInfo.source_id.name:
        None,
        DBondInfo.bond_name.name:
        BondInfo.bond_name.name,
        DBondInfo.bond_full_name.name:
        BondInfo.bond_full_name.name,
        DBondInfo.maturity_date.name:
        BondInfo.maturity_date.name,
        DBondInfo.issue_price.name:
        BondInfo.issue_price.name,
        DBondInfo.issue_amount.name:
        BondInfo.issue_amount.name,
        DBondInfo.par_value.name:
        BondInfo.par_value.name,
        DBondInfo.coupon_rate.name:
        BondInfo.coupon_rate.name,
        DBondInfo.issue_date_start.name:
        BondInfo.issue_date_start.name,
        DBondInfo.issue_date_end.name:
        BondInfo.issue_date_end.name,
        DBondInfo.listing_date.name:
        BondInfo.listing_date.name,
        DBondInfo.consigner.name:
        BondInfo.consigner.name,
        DBondInfo.consignee.name:
        BondInfo.consignee.name,
    })

    stream = Stream(inp, (km, ), name="d_bond_info_sse")

    return stream

Ejemplo n.º 22

0

Mostrar archivo

Archivo: fund_nv_source_whole.py Proyecto: dxcv/fund

def gfundnv2src(fund_id=None, source_id=None, update_time=None):
    session = dbsession(bind=engine_r)
    stmt = session.query(GFundNv).filter(
        GFundNv.is_used == 1, GFundNv.statistic_date > MIN_DATE, GFundNv.nav < 2000
    ).with_entities(
        GFundNv.fund_id, GFundNv.source_id, GFundNv.statistic_date, GFundNv.source_id, GFundNv.nav, GFundNv.added_nav, GFundNv.adjusted_nav
    )
    if update_time is not None:
        stmt = stmt.filter(GFundNv.update_time >= update_time)
    if fund_id is not None:
        stmt = stmt.filter(GFundNv.fund_id == fund_id)
    if source_id is not None:
        stmt = stmt.filter(GFundNv.source_id == source_id)

    inp = MysqlInput(session.bind, stmt)

    vm = transform.ValueMap({
        GFundNv.added_nav.name: lambda x: None if x == 0 else x
    })

    km = transform.MapSelectKeys({
        GFundNv.fund_id.name: FundNvDataSource.fund_id.name,
        GFundNv.statistic_date.name: FundNvDataSource.statistic_date.name,
        GFundNv.source_id.name: FundNvDataSource.source_id.name,
        GFundNv.nav.name: FundNvDataSource.nav.name,
        GFundNv.added_nav.name: FundNvDataSource.added_nav.name,
        GFundNv.adjusted_nav.name: FundNvDataSource.adjusted_nav.name,
    })
    s = Stream(inp, [vm, km])
    return s

Ejemplo n.º 23

0

Mostrar archivo

    def conflu_place(cls):
        # region, prov, city, area

        sql = "SELECT org_id, address FROM {tb_test} WHERE address IS NOT NULL".format(tb_test=TEST_TABLE)
        inp = MysqlInput(ENGINE_RD, sql)

        vm0 = transform.ValueMap({
            "__tmp": (lambda x: cls._clean_place(x), "address")
        })

        vm = transform.ValueMap({
            OrgInfo.region.name: (lambda x: x[0], "__tmp"),
            OrgInfo.prov.name: (lambda x: x[1], "__tmp"),
            OrgInfo.city.name: (lambda x: x[2], "__tmp"),
            OrgInfo.area.name: (lambda x: x[3], "__tmp"),
        })

        sk = transform.MapSelectKeys({
            OrgInfo.org_id.name: None,
            OrgInfo.region.name: None,
            OrgInfo.prov.name: None,
            OrgInfo.city.name: None,
            OrgInfo.area.name: None,
        })

        s = Stream(inp, transform=[vm0, vm, sk])

        return Confluence(s)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: fund_org_mapping_xfi.py Proyecto: dxcv/fund

    def stream_consultant_000001(cls):
        sql = "SELECT fund_id, fund_name, oi.org_id, oi.org_name, org_type_code, start_date, end_date, is_current " \
              "FROM crawl_private.y_fund_org_mapping yfom " \
              "JOIN base.org_info oi ON yfom.org_id = oi.org_id " \
              "WHERE org_type_code = 1 AND is_used = 1"

        inp = MysqlInput(ENGINE_C, sql)

        vm = transform.ValueMap({
            "org_type": "投资顾问"
        })

        sk = transform.MapSelectKeys({
            "fund_id": None,
            "fund_name": None,
            "org_id": None,
            "org_name": None,
            "org_type": None,
            "org_type_code": None,
            "start_date": None,
            "end_date": None,
            "is_current": None,
        })
        dn = transform.Dropna(
            subset=[FundOrgMapping.fund_id.name, FundOrgMapping.org_id.name, FundOrgMapping.org_type_code.name]
        )
        s = Stream(inp, transform=[vm, sk, dn])
        return s

Ejemplo n.º 25

0

Mostrar archivo

    def conflu_is_member(cls):
        # is_member, member_type
        sql = "SELECT org_id, is_reg_now, is_member, member_type FROM {tb_test} ".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql)

        def clean_membertype(is_reg, is_member, member_type, org_id):
            if is_reg == "否":
                if org_id[0] == "P":
                    return "注销备案资格"
                else:
                    return "未备案"
            if is_reg == "是":
                if is_member == "否":
                    return "尚未取得会员资格"
            return member_type

        vm = transform.ValueMap({
            OrgInfo.is_member.name: (lambda is_reg, is_member: "否" if is_reg == "否" else is_member, "is_reg_now", "is_member"),
            OrgInfo.member_type.name: (lambda is_reg, is_member, member_type, org_id: clean_membertype(is_reg, is_member, member_type, org_id), "is_reg_now", "is_member", "member_type", "org_id")
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            OrgInfo.is_member.name: None,
            OrgInfo.member_type.name: None,
        })

        s = Stream(inp, [vm, sk])
        return Confluence(s)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: person_info.py Proyecto: dxcv/fund

    def stream_020003(cls):
        """
            清洗 d_person_info;

        """


        sql = "SELECT im.matched_id, person_name, background FROM crawl_private.d_person_info tb_main " \
              "JOIN (SELECT person_id, MAX(version) latest_ver FROM crawl_private.d_person_info GROUP BY person_id) tb_latest " \
              "ON tb_main.version = tb_latest.latest_ver AND tb_main.person_id = tb_latest.person_id " \
              "JOIN base.id_match im ON im.source_id = tb_main.person_id " \
              "AND im.id_type = 3 AND im.source = '020003' AND im.is_used = 1 "

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "background":
            lambda x: cls.BACKGROUND.get(x) if type(x) is str else x,
            "person_name_py": (lambda x: "".join(
                [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(),
                               "person_name"),
        })

        sk = transform.MapSelectKeys({
            "matched_id": "person_id",
            'background': 'background',
            'person_name': 'person_name',
            "person_name_py": "person_name_py"
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 27

0

Mostrar archivo

Archivo: quantity.py Proyecto: dxcv/fund

    def stream_020003(cls):
        """
            清洗 d_fund_position（020003）;

        """

        sql = " \
               SELECT fps.fund_id,fps.statistic_date,fps.subject_id,dfp.quantity, fps.data_source  \
              FROM base_public.fund_position_stock AS fps \
              JOIN crawl_public.d_fund_position AS dfp \
              ON fps.fund_id = dfp.fund_id AND fps.statistic_date = dfp.statistic_date \
              AND fps.subject_id = dfp.subject_id \
              WHERE dfp.data_source = '020001' AND dfp.quantity is not NULL"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "quantity": lambda x: cls.sub_wrong_to_none(x)
        })

        vm2 = transform.ValueMap({
            "quantity": lambda x: float(x)
        })

        sk = transform.MapSelectKeys({
            "fund_id": "fund_id",
            "data_source": "data_source",
            "statistic_date": "statistic_date",
            "subject_id": "subject_id",
            "quantity": "quantity"
        })

        s = Stream(inp, transform=[vm, vm2, sk])
        return s

Ejemplo n.º 28

0

Mostrar archivo

Archivo: person_info.py Proyecto: dxcv/fund

    def stream_y_person_info(cls):
        """
            清洗 y_person_info;

        """

        sql = "SELECT person_id, person_name, gender, background, education," \
              "graduate_school, investment_years FROM crawl_private.y_person_info"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "person_name_py": (lambda x: "".join(
                [x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(),
                               "person_name"),
        })

        sk = transform.MapSelectKeys({
            "person_id": "person_id",
            'person_name': 'person_name',
            "person_name_py": "person_name_py",
            "gender": "gender",
            "background": "background",
            "graduate_school": "graduate_school",
            "investment_years": "investment_years"
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 29

0

Mostrar archivo

Archivo: fund_org_mapping_03and04.py Proyecto: dxcv/fund

    def stream_04xxxx_type2(cls):
        """
            清洗id_match, 04xxx源基金管理人
        """
        sql = "SELECT idm.matched_id,oi.org_id,oi.org_full_name " \
              "FROM (SELECT DISTINCT matched_id,source FROM base.id_match " \
              "WHERE id_type = 1 AND is_used = 1 AND source LIKE '040%%' " \
              "AND matched_id NOT IN (SELECT fund_id FROM base.fund_org_mapping WHERE org_type_code=2)) idm " \
              "JOIN data_test.source_info_org sig ON idm.source = sig.source_id " \
              "JOIN base.org_info oi ON sig.org_id = oi.org_id"

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            "org_type": "基金管理人",
            "org_type_code": 2

        })

        sk = transform.MapSelectKeys({
            "matched_id": "fund_id",
            'org_full_name': 'org_name',
            "org_type": "org_type",
            "org_type_code": "org_type_code",
            "org_id": "org_id"
        })
        s = Stream(inp, transform=[vm, sk])
        return s

Ejemplo n.º 30

0

Mostrar archivo

    def stream_020003_graduate_school(cls):
        """
            清洗 d_person_info;

        """

        sql = "select idm.matched_id, dd.graduate_school FROM \
                (SELECT matched_id,source,source_id FROM base.id_match \
                where id_type = 3 and is_used = 1 AND source = '020003') as idm \
                JOIN \
                (SELECT * FROM \
                (select MAX(version) as mm,person_id as pi,source_id as si  \
                FROM crawl_private.d_person_info GROUP BY person_id) as ma \
                JOIN crawl_private.d_person_info as dpd \
                ON dpd.person_id = ma.pi and ma.mm = dpd.version) dd \
                ON dd.pi = idm.source_id \
                AND dd.si = idm.source"

        inp = MysqlInput(ENGINE_RD, sql)

        sk = transform.MapSelectKeys({
            "matched_id": "person_id",
            'graduate_school': 'graduate_school'
        })
        dp = transform.Dropna(how='any', axis=0)
        s = Stream(inp, transform=[sk, dp])
        return s