Example #1
0
                    distinct mobile
                from tmp_id_mob_tb
                ) A
        left outer join %s B on A.mobile = B.mobile
    ''' % (args.source, mobile_event_table)
    print sql
    hc.sql(sql).registerTempTable('tmp_mob_tb')
    print hc.sql('select * from tmp_mob_tb limit 10').show(5)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-task', default='Test_id', help='project name')
    parser.add_argument('-flag_4_test', default=1, help='project name')
    parser.add_argument('-target_tb_prefix',
                        default='base_table',
                        help='project name')
    parser.add_argument('-test_tb', default='creditmodel.fasttext_score_dzn')
    args = parser.parse_args()
    args.source = 'base_table' if args.task == 'Test_id' else 'td'
    sql = ' select value, mobile, test_date as apply_dt from %s ' % args.test_tb
    hc.sql(sql).registerTempTable(args.source)
    hc.registerFunction('udf_entity_extract', udf_entity_extract)
    id_event_table = 'bigdata.idnumber_aggregation_events'
    mob_event_table = 'bigdata.mobile_aggregation_events'
    if args.flag_4_test == 1:
        main_offline_extract_data(args, hc, id_event_table, mob_event_table)
    else:
        args.source = 'online'
        main_4_online(args, hc)
Example #2
0
    # 导入联系人信息
    # contact_src = sqlcontext.sql("select * from new_type.contact where time ='{time}' ".format(time=TIME))
    # execute_func(load_data_to_hbase.insert_contact_to_hbase, contact_src, "insert Contact! ")

    # 导入message
    # message_src = sqlcontext.sql("select mid,content,platform_msg_id ,"
    #                              "partyuid,useruid,timesent,media ,direction,msgtype,"
    #                              "platform from new_type.message where time ='{time}' ".format(time=TIME))
    # execute_func(load_data_to_hbase.insert_message_to_hbase, message_src, "insert message! ")

    # 导入group_msg
    group_msg_src = sqlcontext.sql(
        "select * from new_type.group_message where time ='{TIME}'".format(
            time=TIME))
    execute_func(load_data_to_hbase.insert_groupmsg_to_hbase, group_msg_src,
                 "insert groupmsg !")

    # 导入count 信息


if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf-8')
    conf = SparkConf()
    sc = SparkContext(conf=conf)
    sqlcontext = HiveContext(sc)
    shb = SparkHbaseConn(sc, sqlcontext)
    sqlcontext.registerFunction("nvl", lambda x, y: x if x is not None else y)
    execute_load_data()