Exemple #1
0
def main():
    """
    任务调度
    5分钟采集一次数据

    """
    time.sleep(100)
    global total_flow
    total_flow = {}
    global file_record
    file_record = {}
    operatedb = OperateDB("./mysql.conf")
    operatedb.connect()
    pool = redis.ConnectionPool(host='localhost', port=6381, db=0)
    redis_data = redis.Redis(connection_pool=pool)
    keys = redis_data.keys()
    if keys != None:
        for key in keys:
            file_record[key] = redis_data.get(key)
    while True:
        time.sleep(10)
        now = datetime.datetime.now()
        #    now_hour = str(now.strftime('%Y%m%d%H'))
        #    now_day = str(now.strftime('%Y%m%d'))
        rootdir = "../cpu_publish_image_api/"
        print rootdir
        filelist = get_file_list(rootdir)
        filelist = sorted(filelist)
        print filelist
        if len(filelist) == 0:
            continue
        else:
            for filename in filelist:
                total_flow = handle_file(filename)
            if len(total_flow.keys()) >= 2:
                datelist = sorted(total_flow.keys())
                for i in range(0, len(datelist) - 1):
                    sql_cmd = "replace into Image_Flow_Funnel (time, Stage, ImageNum) values ('%s', '%s', %s)" \
                        %(datelist[0], '100', total_flow[datelist[i]])
                    logging.info("api接受到的流量%s" % (sql_cmd))
                    operatedb.executesql(sql_cmd)
                    del total_flow[datelist[i]]
            else:
                continue
            for filename in file_record.keys():
                redis_data.set(filename, file_record[filename])
            continue
    operatedb.disconnect()
def main():
    """
    main
    """
    now = datetime.now()
    now = str(now.strftime('%Y-%m-%d %H:00:00'))
    operatedb = OperateDB("./mysql.conf")
    operatedb.connect()
    content_type = 'news'
    results = get_news_avgonline_time(now)
    timecost = calucate(results)
    update_content(now, timecost, operatedb, content_type)
    content_type = 'image'
    results = get_image_avgonline_time(now)
    timecost = calucate(results)
    update_content(now, timecost, operatedb, content_type)
    content_type = 'video'
    results = get_video_avgonline_time(now)
    timecost = calucate_int(results)
    update_content(now, timecost, operatedb, content_type)
Exemple #3
0
def run():
    """
    get work flow
    """
    now = datetime.datetime.now()
    now.strftime('%Y-%m-%d 00:00:00')
    now_time = str(now).split(' ')[0] + "%"
    now = str(now).split(' ')[0] + " 00:00:00"
    operatedb = OperateDB("./mysql.conf") 
    operatedb.connect()
    configure = ConfigParser.ConfigParser()
    configure.read('reason.conf')
    figures = configure.options('reason')
    for figure in figures:
        sql_cmd = "select sum(count) as count from work_flow_news_%s where time like \'%s\'"%(figure, now_time)
        cursor = operatedb.selectsql(sql_cmd)
        result = cursor.fetchall()
        print result
        if result[0][0] != None:
            reason = configure.get('reason', figure)
            sql_cmd = "replace into work_flow_news (time, reason, count) values (\'%s\', \'%s\', %s)"%(now, reason, int(result[0][0]))
            print sql_cmd
            operatedb.executesql(sql_cmd)
Exemple #4
0
def main():
    """
    任务调度
    5分钟采集一次数据

    """
    global flow_funnel
    operatedb = OperateDB("./mysql.conf")
    operatedb.connect()
    pool = redis.ConnectionPool(host='localhost', port=6380, db=0)
    redis_data = redis.Redis(connection_pool=pool)
    keys = redis_data.keys()
    for key in keys:
        file_record[key] = redis_data.get(key)
    while True:
        rootdir = "../cpu_publish_image_api/"
        #now = datetime.datetime.now()
        #begin_time = now.replace(hour = 00, minute = 15)
        #end_time = now.replace(hour = 00, minute = 45)
        #if now >= begin_time and now <= end_time:
        #    cmd = 'sh -x ./clear_api.sh'
        #    os.system(cmd)
        #    sys.exit()
        #else:
        filelist = get_file_list(rootdir)
        filelist = sorted(filelist)
        if len(filelist) == 0:
            continue
        else:
            for filename in filelist:
                flow_funnel = handle_file(filename)
            if len(flow_funnel.keys()) != 0:
                now = datetime.datetime.now()
                now = now.strftime('%Y-%M-%D %H:%M:%S')
                now = now.split(' ')[0] + "%"
                sql = "select time from Image_Flow_Funnel where Stage = 200 and time like \'%s\' order by time desc" % (
                    now)
                cursor = operatedb.selectsql(sql)
                timelist = []
                for row in cursor:
                    row = ''.join(row)
                    timelist.append(row)
                datelist = sorted(flow_funnel.keys())
                for i in range(0, len(datelist) - 1):
                    if datelist[i] in timelist:
                        sql = "update Image_Flow_Funnel\
                                set ImageNum = ImageNum + %s where Stage = 200 and time = \'%s\'" % (
                            flow_funnel[datelist[i]], datelist[i])
                        logging.info(
                            "save to db success image number as %s%s" %
                            (flow_funnel[datelist[i]], sql))
                    else:
                        sql = "insert into Image_Flow_Funnel\
                                (time, Stage, ImageNum) values (\'%s\', \'%s\', %s)"\
                                %(datelist[i], '200', flow_funnel[datelist[i]])
                        logging.info(
                            "save to db success image number as %s%s" %
                            (flow_funnel[datelist[i]], sql))
                    operatedb.executesql(sql)
                    flow_funnel.pop(datelist[i])
            else:
                continue
            for filename in file_record.keys():
                redis_data.set(filename, file_record[filename])
    operatedb.disconnect()
Exemple #5
0
def main():
    """
    实时采集数据

    """
    logging.basicConfig(level=logging.INFO,
                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                datefmt='%a, %d %b %Y %H:%M:%S',
                filename='savetodb.log',
                filemode='w')
    operatedb = OperateDB("./mysql.conf")
    operatedb.connect()
    pool = redis.ConnectionPool(host='localhost', port=6381, db=0)
    redis_data = redis.Redis(connection_pool=pool)
    keys = redis_data.keys()
    for key in keys:
        file_record[key] = redis_data.get(key)
    while True:
        rootdir = "../cpu_publish_image_mq/"
        filelist = get_file_list(rootdir)
        filelist = sorted(filelist)
        if len(filelist) == 0:
            continue
        else:
            for filename in filelist:
                save_to_db = handle_file(filename)
            """
            策略过滤掉的图片数量以及对应的原因
            """
            """
            入库的图片数量以及对应的类目信息
            """
            now = datetime.datetime.now()
            begin_time = now.replace(hour = 00, minute = 15)
            end_time = now.replace(hour = 00, minute = 45)
            if now >= begin_time and now <= end_time:
                cmd = "sh -x ./clear_savedb.sh"
                os.system(cmd)
                sys.exit()
            else:
                now.strftime('%Y-%m-%d 00:00:00')
                now = str(now).split(' ')[0] + "%"
                sql = "select time from Image_Flow_Funnel where Stage = 400 and time like \'%s\' order by time desc"%(now)
                cursor = operatedb.selectsql(sql)
                timelist = []
                for row in cursor:
                    row = ''.join(row)
                    timelist.append(row)
                if len(save_to_db.keys()) != 0:
                    datelist = sorted(save_to_db.keys())
                    for i in range(0, len(datelist)):
                        if datelist[i] in timelist:
                            sql = "update Image_Flow_Funnel\
                                    set ImageNum = ImageNum + %s where\
                                    time = \'%s\' and Stage = 400"%(save_to_db[datelist[i]], datelist[i])
                            logging.info("save to db success image number as %s%s"%(save_to_db[datelist[i]], sql))
                        else:
                            sql = "insert into Image_Flow_Funnel\
                                    (time, Stage, ImageNum) values (\'%s\', \'400\', %s)"\
                                    %(datelist[i], save_to_db[datelist[i]])
                            logging.info("save to db success image number as %s%s"%(save_to_db[datelist[i]], sql))
                        operatedb.executesql(sql)
                        save_to_db.pop(datelist[i])
                else:
                    continue
                for filename in file_record.keys():
                    redis_data.set(filename, file_record[filename])
    operatedb.disconnect()
Exemple #6
0
def main():
    """
    实时采集数据

    """
    operatedb = OperateDB("./mysql.conf")
    operatedb.connect()
    pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
    redis_data = redis.Redis(connection_pool=pool)
    keys = redis_data.keys()
    for key in keys:
        file_record[key] = redis_data.get(key)
    while True:
        time.sleep(10)
        now = datetime.datetime.now()
        #  now_hour = str(now.strftime('%Y%m%d%H'))
        #  now_day = str(now.strftime('%Y%m%d'))
        rootdir = "../cpu_publish_image_mq/"
        filelist = get_file_list(rootdir)
        filelist = sorted(filelist)
        if len(filelist) == 0:
            continue
        else:
            for filename in filelist:
                work_flow = handle_file(filename)[0]
                save_to_db = handle_file(filename)[1]
                dequeue = handle_file(filename)[2]
            """
            策略过滤掉的图片数量以及对应的原因
            """
            if len(work_flow.keys()) >= 2:
                datelist = sorted(work_flow.keys())
                for i in range(0, len(datelist) - 1):
                    for name in work_flow[datelist[i]].keys():
                        sql_cmd = "replace into work_flow_image\
                              (time, reason, count) values (\'%s\', \'%s\', %s)"\
                              %(datelist[i], name, work_flow[datelist[i]][name])
                        logging.info("插入过滤掉的图集数量及其对应的原因 %s" % (sql_cmd))

                        operatedb.executesql(sql_cmd)
                    work_flow.pop(datelist[i])
            """
            入库的图片数量以及对应的类目信息
            """
            if len(save_to_db.keys()) >= 2:
                datelist = sorted(save_to_db.keys())
                total_num = 0
                for i in range(0, len(datelist) - 1):
                    for categoryId in save_to_db[datelist[i]].keys():
                        sql_cmd = "replace into Image_Category_Info\
                                (time, CategoryId, ImageNum) values (\'%s\', \'%s\', %s)"\
                                %(datelist[i], categoryId, save_to_db[datelist[i]][categoryId])
                        total_num += save_to_db[datelist[i]][categoryId]
                        logging.info("插入入库的图集数量及其类目:%s %s" %
                                     (categoryId, sql_cmd))
                        operatedb.executesql(sql_cmd)
                        sql =  "replace into Image_Flow_Funnel\
                                (time, Stage, ImageNum) values (\'%s\', \'%s\', %s)"\
                                %(datelist[0], '400', total_num)
                        logging.info("插入入库的图集数量:%s %s" % (total_num, sql))
                        operatedb.executesql(sql)
                    save_to_db.pop(datelist[i])
            """
            出队列的图片数量
            """
            if len(dequeue.keys()) >= 2:
                datelist = sorted(dequeue.keys())
                for i in range(0, len(datelist) - 1):
                    sql_cmd = "replace into Image_Flow_Funnel\
                            (time, Stage, ImageNum) values (\'%s\', \'%s\', %s)"\
                            %(datelist[i], '300', dequeue[datelist[i]])
                    logging.info("插入出队列的图集数量%s %s" %
                                 (dequeue[datelist[i]], sql_cmd))
                    operatedb.executesql(sql_cmd)
                    dequeue.pop(datelist[i])
            else:
                continue
            for filename in file_record.keys():
                redis_data.set(filename, file_record[filename])
            continue
    operatedb.disconnect()
Exemple #7
0
 def run(self):
     """
     run the mian process
     """
     logging.basicConfig(level=logging.INFO,
             format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
             datefmt='%a, %d %b %Y %H:%M:%S',
             filename='videoworkflow_%s.log'%(self.table),
             filemode='w')
     operatedb = OperateDB("./mysql.conf")
     pool = redis.ConnectionPool(host='localhost', port=6387, db=0)
     redis_data = redis.Redis(connection_pool=pool)
     self.file_record[reason] = redis_data.hgetall(reason)
     while True:
         rootdir = "../cpu_publish_news_mq/"
         filelist = self.get_file_list(rootdir)
         filelist = sorted(filelist)
         if len(filelist) == 0:
             continue
         else:
             for filename in filelist:
                 logging.info("filename is %s"%(filename))
                 self.work_flow = self.handle_file(filename)
                 now = datetime.datetime.now()
                 now.strftime('%Y-%m-%d 00:00:00')
                 now = str(now).split(' ')[0] + "%"
                 if len(self.work_flow.keys()) != 0:
                     sql = "select reason, time from work_flow_video where time like \'%s\' order by time desc"%(now)
                     logging.info("sql is %s"%(sql))
                     try:
                         operatedb.connect()
                         cursor = operatedb.selectsql(sql)
                         results = cursor.fetchall()
                     except BaseException, e:
                         logging.warning("get reason, time result failed %s"%(e))
                     timelist = []
                     if len(results) == 0:
                         datelist = sorted(self.work_flow.keys())
                         for i in range(0, len(datelist)):
                             sql = "insert into work_flow_video\
                                     (time, count, reason) values (\'%s\', \'%s\', \'%s\')"\
                                     %(datelist[i], self.work_flow[datelist[i]], self.reason)
                             logging.info("sql is %s"%(sql))
                             operatedb.executesql(sql)
                             self.work_flow.pop(datelist[i])
                     else:
                         for result in results:
                             timelist.append(result[1])
                         datelist = sorted(self.work_flow.keys())
                         for i in range(0, len(datelist)):
                             if ( datelist[i] in timelist and result[0] == reason):
                                 sql = "update work_flow_video\
                                         set count = count + %s where\
                                         time = \'%s\' and reason = \'%s\'"%(self.work_flow[datelist[i]], datelist[i], reason)
                                 logging.info("sql is %s"%(sql))
                             else:
                                 sql = "insert into work_flow_video\
                                         (time, count, reason) values (\'%s\', \'%s\', \'%s\')"\
                                         %(datelist[i], self.work_flow[datelist[i]], reason)
                                 logging.info("sql is %s"%(sql))
                             operatedb.executesql(sql)
                             logging.info("save to db success video number as %s"%(self.work_flow[datelist[i]]))
                             self.work_flow.pop(datelist[i])
                 else:
                     continue
                 redis_data.hset(reason, filename, self.file_record[reason][filename])
                 operatedb.disconnect()