def get_minerals_from_hive(): sql = """ SELECT customer_attr, target_attr, customer_item_id, target_item_id FROM t_elengjing.competitive_filtering_stage_1 LIMIT 10000; """ pickle_dump("raw_data", Hive(env="idc").query(sql=sql, to_dict=False))
def get_gold_from_hive(): sql = "SELECT * FROM t_elengjing.competitive_item_train_stage_3" pickle_dump("train_data", Hive(env="idc").query(sql, to_dict=False))
def get_data(): sql = "SELECT COUNT(*) AS cnt FROM das.ci_diaper2014to2016_0406_allplatform_distinct AS d WHERE d.year = '2016'" data = Hive(env="local").query(sql) pickle_dump("raw", data)
def transform(): pickle_dump("id_content", {line[0]: line[1] for line in pickle_load("raw")})