Beispiel #1
0
# coding: utf-8
import time

from config import *
from hbquery import *
from method import get_update_ymd, get_date, isdataready, add_partition

now_time = time.time()
now_date = time.strftime('%Y%m%d', time.localtime(now_time))

date = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY, date_type='everyday')
date2 = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY_PRO,
                       date_type='everyday')
if date == date2:
    start_date = get_date(date, 1) if date else START_YMD
    end_date = get_date(now_date, -1)
    if int(start_date) <= int(end_date):
        dates = get_days(start_date, end_date)
        dates.sort(reverse=True)
        for date in dates:
            if isdataready(date):
                end_date = date
                add_partition(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, end_date)
                add_partition(MysqlTable.BASIC_INDEX_EVERYDAY, end_date)
                active_devices(start_date,
                               end_date,
                               date_type='everyday',
                               insert=True)
                timevalid_active_devices(start_date,
                                         end_date,
                                         date_type='everyday',
Beispiel #2
0
# coding: utf-8
import time

from config import *
from hbquery import *
from method import get_update_ymd, get_date, isdataready, add_partition

now_time = time.time()
now_date = time.strftime('%Y%m%d', time.localtime(now_time))

date = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY, source='finance')
date2 = get_update_ymd(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, source='finance')
if date == date2:
    start_date = get_date(date, 1) if date else FINANCE_START_YMD
    end_date = get_date(now_date, -1)
    if int(start_date) <= int(end_date):
        dates = get_days(start_date, end_date)
        dates.sort(reverse=True)
        for date in dates:
            if isdataready(date, source='finance'):
                end_date = date
                add_partition(MysqlTable.BASIC_INDEX_EVERYDAY_PRO, end_date)
                add_partition(MysqlTable.BASIC_INDEX_EVERYDAY, end_date)
                play_times('finance',
                           start_date,
                           end_date,
                           date_type='everyday',
                           insert=True)
                clicks('finance',
                       start_date,
                       end_date,
# coding: utf-8
import time

from config import *
from hbquery import *
from method import get_update_ymdh, get_date, isdataready, add_partition

now_time = time.time()
now_date = time.strftime('%Y%m%d', time.localtime(now_time))

date = get_update_ymdh(MysqlTable.BASIC_INDEX_HOUR, date_type='hour')
date2 = get_update_ymdh(MysqlTable.BASIC_INDEX_HOUR_PRO, date_type='hour')
if date == date2:
    start_date = get_date(date, 1) if date else START_YMDH
    end_date = get_date(now_date, -1) + '23'
    if int(start_date) <= int(end_date):
        # dates = get_hours(start_date, end_date)
        dates = get_days(start_date[:8], end_date[:8])
        dates.sort(reverse=True)
        for date in dates:
            if isdataready(date):
                end_date = date + '23'
                add_partition(MysqlTable.BASIC_INDEX_HOUR_PRO, end_date)
                add_partition(MysqlTable.BASIC_INDEX_HOUR, end_date)
                play_times(start_date, end_date, date_type='hour', insert=True)
                clicks(start_date, end_date, date_type='hour', insert=True)
                active_devices(start_date,
                               end_date,
                               date_type='hour',
                               insert=True)
                watch_time(start_date, end_date, date_type='hour', insert=True)
Beispiel #4
0
    def init_sql(self,
                 index,
                 table=HbaseMysqlConfig.HBASE_TABLE,
                 date=None,
                 device=None,
                 device_in=None,
                 program=None,
                 tag=None,
                 area=None,
                 filtering=None,
                 group_by=None,
                 having=None,
                 order_by=None,
                 order='asc',
                 limit=None):
        """
        生成sql语句,并将其赋予self.sql。

        :param index: 查询内容,介于select和where之间的部分,用list表示,例:['ymd', 'count(*)']
        :param table: 查询的数据表,默认使用config中的hbase表
        :param date: 时间范围,用list表示
                     若有两个时间,则表示介于这两个节点之间(含节点)例:[date1, date2]
                     若只有一个,则表示截止到这个时间(含节点)例:[date1]
        :param device: 添加用户id的条件
        :param device_in: 使用in语句查多个用户
        :param program: 添加节目id的条件
        :param tag: 添加栏目id的条件
        :param area: 添加地区的条件
        :param filtering: 过滤语句
        :param group_by: group by语句,用list表示,例:['ymd']
        :param having: having语句
        :param order_by: order by语句,用list表示
        :param order: 排序方式,默认为顺序
        :param limit: limit语句
        """
        # 生成select后要查询的指标
        index_str = ','.join(index)

        # 生成时间范围
        if self.date_type == 'hour':
            if not date:
                date_str = ''
            elif len(date) == 1:
                date_str = 'ymd<{date} and ymdh<={dateh}'.format(date=get_date(
                    date[0][:8], 1),
                                                                 dateh=date[0])
            else:
                date_str = 'ymd>{start_date} and ymd<{end_date} and ymdh>={start_dateh} and ymdh<={end_dateh}'.format(
                    start_date=get_date(date[0][:8], -1),
                    end_date=get_date(date[1][:8], 1),
                    start_dateh=date[0],
                    end_dateh=date[1])
        else:
            if not date:
                date_str = ''
            elif len(date) == 1:
                date_str = 'ymd<={date}'.format(date=date[0])
            else:
                date_str = 'ymd>={start_date} and ymd<={end_date}'.format(
                    start_date=date[0], end_date=date[1])

        # 分别生成关于device_id,program_id的语句
        device_str = "device_id='{device_id}'".format(
            device_id=device) if device else ''
        devicein_str = 'device_id in ({device_id})'.format(
            device_id=device_in) if device_in else ''
        program_str = "content_id='{content_id}'".format(
            content_id=program) if program else ''

        # 生成关于tag_id的语句
        if tag and tag != 'tagall':
            if tag in TAGS and TAGS[tag]:
                tag_str = "(Lower(tagid)='{tagid}'".format(tagid=tag)
                for subtag in TAGS[tag]:
                    tag_str += " or Lower(tagid)='{tagid}'".format(
                        tagid=subtag)
                tag_str += ')'
            else:
                tag_str = "Lower(tagid)='{tagid}'".format(tagid=tag)
        else:
            tag_str = ''

        # 生成关于area_id的语句
        area_str = "province='{province}'".format(
            province=area) if area and area != '000000' else ''

        # 生成过滤条件
        filter_str = '' if not filtering else filtering

        # 生成group by和order by的语句
        group_by_str = 'group by ' + ','.join(group_by) if group_by else ''
        having_str = 'having ' + having if having else ''
        order_by_str = 'order by ' + ','.join(
            order_by) + ' ' + order if order_by else ''

        # 生成limit语句
        limit_str = '' if not limit else "limit " + str(limit)

        # 生成最终语句
        self.sql = "select " + index_str + " from {table}".format(table=table)
        condition = [
            string for string in [
                date_str, device_str, devicein_str, program_str, tag_str,
                area_str, filter_str
            ] if string
        ]
        auxi_str = [
            string
            for string in [group_by_str, having_str, order_by_str, limit_str]
        ]
        if condition:
            self.sql += " where " + ' and '.join(condition)
        if auxi_str:
            self.sql += ' ' + ' '.join(auxi_str)