コード例 #1
0
def create_conversion_result_table():
    hql = 'CREATE TABLE IF NOT EXISTS %s (' \
          'process STRING,' \
          'counts INT,' \
          'countu INT)' \
          'PARTITIONED BY (dt STRING)' \
          'ROW FORMAT DELIMITED FIELDS TERMINATED BY \'\t\'' % (configure.CResult_table)
    hive.execute_hql(hql)
コード例 #2
0
def create_conversion_middle_result_table():
    hql = 'CREATE TABLE IF NOT EXISTS %s (' \
          'session STRING, ' \
          'uuid STRING, ' \
          'process STRING) ' \
          'PARTITIONED BY (dt STRING) ' \
          'ROW FORMAT DELIMITED FIELDS TERMINATED BY \'\t\'' % (configure.CMiddleResult_table)
    hive.execute_hql(hql)
コード例 #3
0
def load_conversion_result(dt):
    hql = 'load data inpath \'%s\' ' \
          'into table conversion_middle_result partition(dt=\'%s\')' % (configure.ConversionOutput, dt)
    hive.execute_hql(hql)
    hql = 'insert into table conversion_result partition(dt=\'%s\') ' \
          'select process, count(process), count(distinct(uuid)) ' \
          'from conversion_middle_result ' \
          'where dt=\'%s\' group by process' % (dt, dt)
    hive.execute_hql(hql)
コード例 #4
0
def create_conversion_table():
    # todo table content
    hql = 'create table if not EXISTS %s (' \
          'url string, ' \
          'uuid string, ' \
          'session_id string, ' \
          'csvp string) partitioned by (dt string)' \
          'row format delimited fields terminated by \'\t\'' % (configure.CInput_table)
    hive.execute_hql(hql)
コード例 #5
0
def extract_data(old, new, dt):
    """
    select url uuid sessionid csvp from clickstream_log into conversion_table
    :return: None
    """
    hql = 'insert into table %s partition (dt=\'%s\') ' \
          'select url,uuid,uuid,csvp ' \
          'from %s ' \
          'where dt >= \'%s\' and dt <= \'%s\'' % (configure.CInput_table, dt, configure.CSLog_table, old, new)

    hive.execute_hql(hql)
コード例 #6
0
def create_click_stream_log_table():
    hql = 'create table if not EXISTS %s (' \
          'ip_address string, ' \
          'uuid string, ' \
          'url string, ' \
          'session_id string, ' \
          'session_times string, ' \
          'area_address string, ' \
          'local_address string, ' \
          'browser_type string, ' \
          'os string, ' \
          'refer_url string, ' \
          'receive_time string, ' \
          'user_id string,' \
          'csvp string) ' \
          'partitioned by (dt string) ' \
          'row format delimited fields terminated by \'\t\'' % (configure.CSLog_table)
    hive.execute_hql(hql=hql)
コード例 #7
0
def load_click_stream_log(dt):
    hql = 'load data inpath \'%s\' ' \
          'into table %s ' \
          'partition(dt=\'%s\')' % (configure.ClickStreamOutput, configure.CSLog_table,dt)
    hive.execute_hql(hql)