Ejemplo n.º 1
0
    def get(self,q_dict,step_minute=None): #query_dict  #step(time): 5 (unit:minute)
        rst = self.rawget(q_dict,step_minute)
        data = rst['Data']
        if not data:
            return rst
        if not q_dict.get('_pageby',''):
            return rst

        time1 = time.time()
        # 如果没有limit_rows参数,并且满足如下任一条件,则直接返回rawdata
        if (not q_dict.get('_limit_rows','')) and (len(q_dict['_pageby'])>1 or len(q_dict['_fields'])>1 or str(q_dict.get('_tstep','')) == '0' or q_dict.has_key('_rawdata')):
            return rst

        # 如果有limit_rows参数,则直接按规则过滤
        elif q_dict.get('_limit_rows',''):
            new_data = self.limit_rows_get(data, q_dict)
            action = 'Filter'

        # 如果没有limit_rows参数,同时又都不满足上面几个条件,则会做数据填充和数据清洗
        else:
            filled = self.datafill(data)
            new_data = self.dataclean(filled,q_dict.get('_dataclean',5))
            action = 'Fill&Clean'

        rst['Data'] = new_data
        elapsed = round(time.time() - time1)
        new_count = len(new_data)
        log('[Data.%s] Elapsed: %s ResultCountChange %s --> %s' %(action, elapsed, rst['ResultCount'], new_count))

        elapsed += float(rst['Elapsed'][:-1])
        rst['Elapsed'] = str(elapsed)+'s'
        rst['ResultCount'] = str(new_count)+' rows'
        return rst
Ejemplo n.º 2
0
 def _loadfile2db_one_by_one(self, file, tablename, fields_terminated):
     db = self.conn(readonly = False, tablename = tablename)
     cur = db.cursor()
     insert = 0
     update = 0
     err = 0
     rec = 0
     f = open(file,'rb')
     while True:
         try: l = f.next()
         except StopIteration: break
         rec += 1
         values = l[:-1].split(fields_terminated)
         try:
             r = self.query_append(cur, tablename, values)
             if r == 'insert':
                 insert += 1
             elif r == 'update':
                 update += 1
         except:
             #print 'append error:',traceback.format_exc()
             log('append error. %s' %traceback.format_exc(), 1) 
             err += 1
         
     f.close()
     cur.close()
     db.close()
     return {'Records': rec,  'Added':insert ,  'Updated':update,  'Errors': err} 
Ejemplo n.º 3
0
def upload_result_file(request):
    """
    上传文件,做入库处理  POST 方法
    参数:
    file=@filename   本地文件
    _o=cdnbw         数据object名字
    _s=201203201340   这份数据的时间戳,请注意该文件中的数据时间跨度不得超过定义的拆表时间区间(table_split)
    _t=,             数据的分隔符,默认空格
    data_exists_action  数据如果存在(主键冲突)的处理方法,支持3种模式:    ignore (Default) | replace | append
                        ignore:    如果发现数据已经存在,则忽略;不存在的数据可以成功导入
                        replace:   如果已存在,则替换掉旧数据
                        append:    如果已存在,则往上累加(除主键外的其他字段,当存在百分比类似的数据时不适合使用append,除非确认可以对这些字段做累加)

    e.g. curl  -F file=@filename -F _o=cdnbw -F _s=201203201340 -F '_t=,' -F data_exists_action=replace http://jdata.domain/api/uploadresultfile

    """
    uploaddir = '/tmp/jdata/'
    if not isdir(uploaddir):mkdir(uploaddir)
    if request.method == 'POST':
        if request.META.has_key('HTTP_X_FORWARDED_FOR'):  
            client =  request.META['HTTP_X_FORWARDED_FOR']  
        else:  
            client = request.META['REMOTE_ADDR'] 
        file = request.FILES['file']
        filename = file.name
        try:obj = request.POST['_o']
        except:raise UploadDataParameterError("parameter `_o` is required")
        DM = DataModel(obj)
        try:timeline = request.POST['_s']
        except:raise UploadDataParameterError("parameter `_s` is required")
        fields_terminated = request.POST.get('_t',' ')

        objdir = join(uploaddir,obj)
        if not isdir(objdir):
            mkdir(objdir)
        now = time.strftime('%Y%m%d%H%M%S',time.localtime())
        tmpfile = join(objdir,now+'_'+timeline+'_'+filename)
        f = open(tmpfile,'wb')
        f.write(file.read())
        f.close()
        data_exists_action = request.POST.get('data_exists_action','ignore')
        t1 = time.time()

        if data_exists_action in('ignore','replace', 'append'):
            tablename = DM.Data.gettablename(timeline)
            if data_exists_action in ('replace', 'append'):
                if not DM.Data.pk:
                    raise ObjectPKNotFound('No PK Found For this Data `%s` ,so you can not use parameter `%s`' %(obj, data_exists_action))
            rst = DM.Data.loadfile2db(tmpfile, tablename, fields_terminated, data_exists_action)
            log('upload: %s %s [%s] %ss %s %s' %(client, obj, data_exists_action, int(time.time()-t1),tmpfile,rst))
            try: 
                remove(tmpfile)
            except:
                log('remove resultfile Failed: %s' %tmpfile)
            return return_http_json(rst)
        else:
            raise UploadDataParameterError(" Unknown value `"+data_exists_action+"` for data_exists_action")
    else:
        raise UploadDataMethodError("""Please POST your data like this: curl  -F file=@filename -F _o=cdnbw -F _s=201203201340 -F '_t= '  http://jdata.domain/api/uploadresultfile""")
Ejemplo n.º 4
0
 def query_master(self, sql):
     if sql.lower().find('select') == 0:
         c = self._conn(self.dmc_master_r)
         log('MR: %s' %sql,2)
     else:
         c = self._conn(self.dmc_master_w)
         log('MW: %s' %sql,2)
     cur = c.cursor()
     cur.execute(sql)
     t = cur.fetchall()
     cur.execute('commit')
     cur.close()
     c.close()
     return t
Ejemplo n.º 5
0
 def queryexecute(self,sql, tablename = ''):
     if sql.lower().find('select') == 0:
         db = self.conn(readonly = True, tablename = tablename)
         log('R: %s' %sql, 1)
     else:
         db = self.conn(readonly = False, tablename = tablename )
         log('W: %s' %sql, 1)
     c=db.cursor()
     rows_examined = 0
     try:
         try:
             c.execute(sql)
         except OperationalError , e:
             if e[0] == 1203:
                 log('1203 Error max_user_connections, retry...', 1)
                 time.sleep(0.01)
                 c.execute(sql)
             elif e[0] == 1050:
                 raise TableAlreadyExists(e[1])
             else:
                 raise e
         except ProgrammingError , e:
             #if sql.lower().find('select') == 0 and e[0] == 1146:
             if e[0] == 1146:
                 raise TableNotExists('Table not exists: '+e[1])
             else:
                 raise e
Ejemplo n.º 6
0
    def query(self,s,iscache=True,refresh=False,tablename = ''):
        if not iscache:
            return self.queryexecute(s, tablename)
        m=md5.new()
        m.update(s)
        ky=m.hexdigest()
        ky = 'jd_'+ky

        if not refresh:
            v = sqlcache.get(ky)
            if v:
                log('CACHE HIT:%s ProcessedRowsMem:%s' %(tablename, v['ProcessedRowsMem']))
                return v
            else:
                info = 'CACHE MISS:'
        else:
            info = 'CACHE REFRESH:'
        try:
            rst = self.queryexecute(s, tablename)
        except TableNotExists,e:
            log('%s %s' %(info, e), 1)
            return {'Data':(),'ProcessedRowsMem':0,'ProcessedRowsDisk':0}
Ejemplo n.º 7
0
def upload_result_file(request):
    """
    上传文件,做入库处理  POST 方法
    参数:
    file=@filename   本地文件
    _o=cdnbw         数据object名字
    _s=201203201340   这份数据的时间戳,请注意该文件中的数据时间跨度不得超过定义的拆表时间区间(table_split)
    _t=,             数据的分隔符,默认空格
    data_exists_action  数据如果存在(主键冲突)的处理方法,支持3种模式:    ignore (Default) | replace | append
                        ignore:    如果发现数据已经存在,则忽略;不存在的数据可以成功导入
                        replace:   如果已存在,则替换掉旧数据
                        append:    如果已存在,则往上累加(除主键外的其他字段,当存在百分比类似的数据时不适合使用append,除非确认可以对这些字段做累加)

    e.g. curl  -F file=@filename -F _o=cdnbw -F _s=201203201340 -F '_t=,' -F data_exists_action=replace http://jdata.domain/api/uploadresultfile

    """
    uploaddir = '/tmp/jdata/'
    if not isdir(uploaddir): mkdir(uploaddir)
    if request.method == 'POST':
        if request.META.has_key('HTTP_X_FORWARDED_FOR'):
            client = request.META['HTTP_X_FORWARDED_FOR']
        else:
            client = request.META['REMOTE_ADDR']
        file = request.FILES['file']
        filename = file.name
        try:
            obj = request.POST['_o']
        except:
            raise UploadDataParameterError("parameter `_o` is required")
        DM = DataModel(obj)
        try:
            timeline = request.POST['_s']
        except:
            raise UploadDataParameterError("parameter `_s` is required")
        fields_terminated = request.POST.get('_t', ' ')

        objdir = join(uploaddir, obj)
        if not isdir(objdir):
            mkdir(objdir)
        now = time.strftime('%Y%m%d%H%M%S', time.localtime())
        tmpfile = join(objdir, now + '_' + timeline + '_' + filename)
        f = open(tmpfile, 'wb')
        f.write(file.read())
        f.close()
        data_exists_action = request.POST.get('data_exists_action', 'ignore')
        t1 = time.time()

        if data_exists_action in ('ignore', 'replace', 'append'):
            tablename = DM.Data.gettablename(timeline)
            if data_exists_action in ('replace', 'append'):
                if not DM.Data.pk:
                    raise ObjectPKNotFound(
                        'No PK Found For this Data `%s` ,so you can not use parameter `%s`'
                        % (obj, data_exists_action))
            rst = DM.Data.loadfile2db(tmpfile, tablename, fields_terminated,
                                      data_exists_action)
            log('upload: %s %s [%s] %ss %s %s' %
                (client, obj, data_exists_action, int(time.time() - t1),
                 tmpfile, rst))
            try:
                remove(tmpfile)
            except:
                log('remove resultfile Failed: %s' % tmpfile)
            return return_http_json(rst)
        else:
            raise UploadDataParameterError(" Unknown value `" +
                                           data_exists_action +
                                           "` for data_exists_action")
    else:
        raise UploadDataMethodError(
            """Please POST your data like this: curl  -F file=@filename -F _o=cdnbw -F _s=201203201340 -F '_t= '  http://jdata.domain/api/uploadresultfile"""
        )
Ejemplo n.º 8
0
    def rawget(self,q_dict,step_minute=None): #query_dict  #step(time): 5 (unit:minute)
        if not step_minute:
            if not q_dict.has_key('_tstep'):
                step_minute=5
            else:
                step_minute=q_dict['_tstep']
        step_sec = 60*int(step_minute)
        time1=time.time()
        #time_field='date_format(date_add(concat('+self.timefield+',"00") ,interval -mod(right('+self.timefield+',2),'+str(step_minute)+') minute),"%Y/%m/%d %H:%i")'
        time_field = 'from_unixtime(floor((unix_timestamp(concat(%s,"00"))+28800)/%s)*%s-28800, "%s")'  %(self.timefield, step_sec, step_sec, "%Y/%m/%d %H:%i")
        # +-28800 主要是为了去除unixtime不是从0点开始(从1970-01-01 08:00:00开始)的影响

        querylist=[]
        display_fields = []
        sql_s = 'select '  #select sql
        if str(step_minute) != '0':
            sql_s=sql_s+time_field+', '
            display_fields.append('时间')
        if q_dict.get('_pageby',''):
            for j in q_dict['_pageby']:
                if j in self.fields_alias.keys():
                    display_fields.append(self.fields_alias[j][1])
                    j = self.fields_alias[j][0]
                else:
                    try:
                        fd_name = [o[2] for o in self.meta_db if o[0] == j][0]
                    except:
                        fd_name = j
                    display_fields.append(fd_name)
                sql_s = sql_s + j + ','
        for i in q_dict['_fields']:
            if i in self.fields_alias.keys():
                display_fields.append(self.fields_alias[i][1])
                i = self.fields_alias[i][0].replace('_minutes',str(step_minute))
                i = i.replace('_tstep', str(step_minute))
            else:
                try:
                    fd_name = [o[2] for o in self.meta_db if o[0] == i][0]
                except:
                    fd_name = i
                display_fields.append(fd_name)
            #check tstep:如果tstep=0,只允许sum,count(加),暂不支持减乘除,avg,max,min等
            if str(step_minute) == '0':
                for x in ('/', '*', '-', 'avg', 'max', 'min'):
                    if i.find(x) >= 0:
                        raise UnsupportedQuery('Field `%s` is not supported with `_tstep=0`' %i)
            sql_s=sql_s+i+','
        sql_s = sql_s[:-1]+' from '
        sql_w = ' where '   #where sql
        if not q_dict.has_key('_s'):
            stime=time.strftime('%Y%m%d0000')
            etime=time.strftime('%Y%m%d%H%M')
        else:
            stime=q_dict['_s']
            if not q_dict.has_key('_e'):
                etime=time.strftime('%Y%m%d%H%M')
            else:
                etime=q_dict['_e']
        try:
            querylist=self.generate_querylist(stime,etime)
        except ValueError:
            raise URLParameterError('Time Format Error  `start`:'+stime+' `end`:'+etime)

        for i in q_dict['_filters']:
            sql_w = sql_w+i+' and '

        if str(step_minute) != '0':
            if q_dict.get('_pageby',''):
                #sql_g = ' group by '+time_field+','+q_dict['_pageby']
                sql_g = ' group by '+time_field
                for j in q_dict['_pageby']:
                    if j in self.fields_alias.keys():
                        j = self.fields_alias[j][0]
                    sql_g = sql_g + ',' + j
            else:
                sql_g = ' group by '+time_field
        else:
            if q_dict.get('_pageby',''):
                #sql_g = ' group by '+q_dict['_pageby']
                sql_g = ' group by '
                for j in q_dict['_pageby']:
                    if j in self.fields_alias.keys():
                        j = self.fields_alias[j][0]
                    sql_g = sql_g + j + ','
                sql_g = sql_g[:-1]
            else:
                sql_g = ''

        mythread = []
        debugout = []
        for i in querylist:
            sql=sql_s+i[0]+sql_w+self.timefield+' >= '+i[1]+' and '+self.timefield+' <= '+i[2]+sql_g    
            args=(sql,step_minute,q_dict.get('_pageby',''),q_dict.has_key('_refresh'), i[0])
            t=Thread(target=self.multithreadquery,args=args)
            mythread.append(t)
            debugout.append( dict(zip(('sql','_tstep','_pageby','_refresh','tablename'), args) ))

        if q_dict.has_key('_debug'):
            raise DebugPrint(debugout)
            
        for i in mythread:
            i.start()
        for i in mythread:
            i.join()

        data= list(self.multithreadresult)
        data.sort()
        rst_cnt = len(data)
        querys = len(querylist)
        elapsed = round(time.time()-time1,3)

        log('[Data.rawget] Elapsed:%s QuerysALL:%s FromMem/Disk/NotFound:%s/%s/%s \
ProcessedRowsDisk:%s ProcessedRowsMem:%s ResultCount:%s %s' %(elapsed, querys, self.querys_frommem, self.querys_fromdisk, self.querys_notfound, self.rowsfromdisk, self.rowsfrommem, rst_cnt, q_dict))
        rst = {'Data':data, 'Metadata':display_fields, 'Elapsed':str(elapsed)+'s', 'ProcessedRowsDisk':str(self.rowsfromdisk)+' rows', 'ProcessedRowsMem':str(self.rowsfrommem)+' rows', 'ResultCount':str(rst_cnt)+' rows'}
        self.multithreadresult=()
        self.rowsfromdisk = 0
        self.rowsfrommem = 0
        self.querys_notfound = 0
        self.querys_frommem = 0
        self.querys_fromdisk = 0
        return rst
Ejemplo n.º 9
0
        if not refresh:
            v = sqlcache.get(ky)
            if v:
                log('CACHE HIT:%s ProcessedRowsMem:%s' %(tablename, v['ProcessedRowsMem']))
                return v
            else:
                info = 'CACHE MISS:'
        else:
            info = 'CACHE REFRESH:'
        try:
            rst = self.queryexecute(s, tablename)
        except TableNotExists,e:
            log('%s %s' %(info, e), 1)
            return {'Data':(),'ProcessedRowsMem':0,'ProcessedRowsDisk':0}
        except TableNotExistsInDMC,e:
            log('%s %s' %(info, e), 1)
            return {'Data':(),'ProcessedRowsMem':0,'ProcessedRowsDisk':0}
        log('%s:%s ProcessedRowsDisk:%s' %(info, tablename, rst['ProcessedRowsDisk']))
        mem_rst = {}
        mem_rst['Data'] = rst['Data']
        mem_rst['ProcessedRowsMem'] = rst['ProcessedRowsDisk']
        mem_rst['ProcessedRowsDisk'] = 0
        sqlcache.set(ky,mem_rst,60*60*24)
        return rst


    def multithreadquery(self,sql,step_minute,pageby,refresh,tablename):
        rst=self.query(sql,refresh=refresh, tablename = tablename)
        t = rst['Data']
        rows_disk = rst['ProcessedRowsDisk']
        rows_mem = rst['ProcessedRowsMem']