# coding: utf-8 import time from config import * from hbquery import * from method import get_update_ymd, get_date, isdataready, add_partition now_time = time.time() now_date = time.strftime('%Y%m%d', time.localtime(now_time)) date = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY, date_type='everyday') date2 = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, date_type='everyday') if date == date2: start_date = get_date(date, 1) if date else START_YMD end_date = get_date(now_date, -1) if int(start_date) <= int(end_date): dates = get_days(start_date, end_date) dates.sort(reverse=True) for date in dates: if isdataready(date): end_date = date add_partition(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, end_date) add_partition(MysqlTable.BASIC_INDEX_EVERYDAY, end_date) active_devices(start_date, end_date, date_type='everyday', insert=True) timevalid_active_devices(start_date, end_date, date_type='everyday',
# coding: utf-8 import time from config import * from hbquery import * from method import get_update_ymd, get_date, isdataready, add_partition now_time = time.time() now_date = time.strftime('%Y%m%d', time.localtime(now_time)) date = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY, source='finance') date2 = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, source='finance') if date == date2: start_date = get_date(date, 1) if date else FINANCE_START_YMD end_date = get_date(now_date, -1) if int(start_date) <= int(end_date): dates = get_days(start_date, end_date) dates.sort(reverse=True) for date in dates: if isdataready(date, source='finance'): end_date = date add_partition(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, end_date) add_partition(MysqlTable.BASIC_INDEX_EVERYDAY, end_date) play_times('finance', start_date, end_date, date_type='everyday', insert=True) clicks('finance', start_date, end_date,
# coding: utf-8 import time from config import * from hbquery import * from method import get_update_ymdh, get_date, isdataready, add_partition now_time = time.time() now_date = time.strftime('%Y%m%d', time.localtime(now_time)) date = get_update_ymdh(MysqlTable.BASIC_INDEX_HOUR, date_type='hour') date2 = get_update_ymdh(MysqlTable.BASIC_INDEX_HOUR_PRO, date_type='hour') if date == date2: start_date = get_date(date, 1) if date else START_YMDH end_date = get_date(now_date, -1) + '23' if int(start_date) <= int(end_date): # dates = get_hours(start_date, end_date) dates = get_days(start_date[:8], end_date[:8]) dates.sort(reverse=True) for date in dates: if isdataready(date): end_date = date + '23' add_partition(MysqlTable.BASIC_INDEX_HOUR_PRO, end_date) add_partition(MysqlTable.BASIC_INDEX_HOUR, end_date) play_times(start_date, end_date, date_type='hour', insert=True) clicks(start_date, end_date, date_type='hour', insert=True) active_devices(start_date, end_date, date_type='hour', insert=True) watch_time(start_date, end_date, date_type='hour', insert=True)
def init_sql(self, index, table=HbaseMysqlConfig.HBASE_TABLE, date=None, device=None, device_in=None, program=None, tag=None, area=None, filtering=None, group_by=None, having=None, order_by=None, order='asc', limit=None): """ 生成sql语句,并将其赋予self.sql。 :param index: 查询内容,介于select和where之间的部分,用list表示,例:['ymd', 'count(*)'] :param table: 查询的数据表,默认使用config中的hbase表 :param date: 时间范围,用list表示 若有两个时间,则表示介于这两个节点之间(含节点)例:[date1, date2] 若只有一个,则表示截止到这个时间(含节点)例:[date1] :param device: 添加用户id的条件 :param device_in: 使用in语句查多个用户 :param program: 添加节目id的条件 :param tag: 添加栏目id的条件 :param area: 添加地区的条件 :param filtering: 过滤语句 :param group_by: group by语句,用list表示,例:['ymd'] :param having: having语句 :param order_by: order by语句,用list表示 :param order: 排序方式,默认为顺序 :param limit: limit语句 """ # 生成select后要查询的指标 index_str = ','.join(index) # 生成时间范围 if self.date_type == 'hour': if not date: date_str = '' elif len(date) == 1: date_str = 'ymd<{date} and ymdh<={dateh}'.format(date=get_date( date[0][:8], 1), dateh=date[0]) else: date_str = 'ymd>{start_date} and ymd<{end_date} and ymdh>={start_dateh} and ymdh<={end_dateh}'.format( start_date=get_date(date[0][:8], -1), end_date=get_date(date[1][:8], 1), start_dateh=date[0], end_dateh=date[1]) else: if not date: date_str = '' elif len(date) == 1: date_str = 'ymd<={date}'.format(date=date[0]) else: date_str = 'ymd>={start_date} and ymd<={end_date}'.format( start_date=date[0], end_date=date[1]) # 分别生成关于device_id,program_id的语句 device_str = "device_id='{device_id}'".format( device_id=device) if device else '' devicein_str = 'device_id in ({device_id})'.format( device_id=device_in) if device_in else '' program_str = "content_id='{content_id}'".format( content_id=program) if program else '' # 生成关于tag_id的语句 if tag and tag != 'tagall': if tag in TAGS and TAGS[tag]: tag_str = "(Lower(tagid)='{tagid}'".format(tagid=tag) for subtag in TAGS[tag]: tag_str += " or Lower(tagid)='{tagid}'".format( tagid=subtag) tag_str += ')' else: tag_str = "Lower(tagid)='{tagid}'".format(tagid=tag) else: tag_str = '' # 生成关于area_id的语句 area_str = "province='{province}'".format( province=area) if area and area != '000000' else '' # 生成过滤条件 filter_str = '' if not filtering else filtering # 生成group by和order by的语句 group_by_str = 'group by ' + ','.join(group_by) if group_by else '' having_str = 'having ' + having if having else '' order_by_str = 'order by ' + ','.join( order_by) + ' ' + order if order_by else '' # 生成limit语句 limit_str = '' if not limit else "limit " + str(limit) # 生成最终语句 self.sql = "select " + index_str + " from {table}".format(table=table) condition = [ string for string in [ date_str, device_str, devicein_str, program_str, tag_str, area_str, filter_str ] if string ] auxi_str = [ string for string in [group_by_str, having_str, order_by_str, limit_str] ] if condition: self.sql += " where " + ' and '.join(condition) if auxi_str: self.sql += ' ' + ' '.join(auxi_str)