コード例 #1
0
def get_minerals_from_hive():
    sql = """
    SELECT
        customer_attr,
        target_attr,
        customer_item_id,
        target_item_id
    FROM t_elengjing.competitive_filtering_stage_1
    LIMIT 10000;
    """
    pickle_dump("raw_data", Hive(env="idc").query(sql=sql, to_dict=False))
コード例 #2
0
def get_gold_from_hive():
    sql = "SELECT * FROM t_elengjing.competitive_item_train_stage_3"
    pickle_dump("train_data", Hive(env="idc").query(sql, to_dict=False))
コード例 #3
0
def get_data():
    sql = "SELECT COUNT(*) AS cnt FROM das.ci_diaper2014to2016_0406_allplatform_distinct AS d WHERE d.year = '2016'"
    data = Hive(env="local").query(sql)
    pickle_dump("raw", data)
コード例 #4
0
def transform():
    pickle_dump("id_content",
                {line[0]: line[1]
                 for line in pickle_load("raw")})