distinct mobile from tmp_id_mob_tb ) A left outer join %s B on A.mobile = B.mobile ''' % (args.source, mobile_event_table) print sql hc.sql(sql).registerTempTable('tmp_mob_tb') print hc.sql('select * from tmp_mob_tb limit 10').show(5) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-task', default='Test_id', help='project name') parser.add_argument('-flag_4_test', default=1, help='project name') parser.add_argument('-target_tb_prefix', default='base_table', help='project name') parser.add_argument('-test_tb', default='creditmodel.fasttext_score_dzn') args = parser.parse_args() args.source = 'base_table' if args.task == 'Test_id' else 'td' sql = ' select value, mobile, test_date as apply_dt from %s ' % args.test_tb hc.sql(sql).registerTempTable(args.source) hc.registerFunction('udf_entity_extract', udf_entity_extract) id_event_table = 'bigdata.idnumber_aggregation_events' mob_event_table = 'bigdata.mobile_aggregation_events' if args.flag_4_test == 1: main_offline_extract_data(args, hc, id_event_table, mob_event_table) else: args.source = 'online' main_4_online(args, hc)
# 导入联系人信息 # contact_src = sqlcontext.sql("select * from new_type.contact where time ='{time}' ".format(time=TIME)) # execute_func(load_data_to_hbase.insert_contact_to_hbase, contact_src, "insert Contact! ") # 导入message # message_src = sqlcontext.sql("select mid,content,platform_msg_id ," # "partyuid,useruid,timesent,media ,direction,msgtype," # "platform from new_type.message where time ='{time}' ".format(time=TIME)) # execute_func(load_data_to_hbase.insert_message_to_hbase, message_src, "insert message! ") # 导入group_msg group_msg_src = sqlcontext.sql( "select * from new_type.group_message where time ='{TIME}'".format( time=TIME)) execute_func(load_data_to_hbase.insert_groupmsg_to_hbase, group_msg_src, "insert groupmsg !") # 导入count 信息 if __name__ == '__main__': reload(sys) sys.setdefaultencoding('utf-8') conf = SparkConf() sc = SparkContext(conf=conf) sqlcontext = HiveContext(sc) shb = SparkHbaseConn(sc, sqlcontext) sqlcontext.registerFunction("nvl", lambda x, y: x if x is not None else y) execute_load_data()