def mergeLog2(domain, ts ): merge = False zero = False try: username = util.getUsernameByDomain(domain) # f = getMergePath(ts,username,domain) # 新的路径 f = getMergeTmpPath2(ts,username,domain) outnewpath = os.path.dirname(f) if not os.path.exists(outnewpath): os.makedirs(outnewpath) logger.debug("mergeLog Path: %s" % f) # gz = gzip.GzipFile( filename=f , mode="w" ) with open(f, "w") as xie: for cdn in getDomainProvider(domain): logger.debug(cdn) # 旧的路径 dpath = getMergeDestPath(cdn,ts,username,domain) logger.debug(dpath) if os.path.exists(dpath): zero = True # if dpath.endswith("gz"): with open(dpath) as g: for line in g: xie.write(line) # else: # with open(dpath) as g: # for line in g: # file.write(line) # gzip.close() if zero: merge = True else: logger.debug("mergeLog Path: %s don't have logs." % f) os.remove(f) except: logger.debug("mergeLog Error: %s" % str(sys.exc_info())) mscheduler.enqueue_in(timedelta(minutes=5), mergeLog, domain,ts) finally: try: if merge: username = util.getUsernameByDomain(domain) f = getMergePath2(ts, username, domain) outnewpath = os.path.dirname(f) if not os.path.exists(outnewpath): os.makedirs(outnewpath) shutil.move(getMergeTmpPath2(ts,username,domain),f) rlog.lpush(config.get("redis","ThreeLogKey"),f) except: logger.debug("mergeLog Error: %s" % str(sys.exc_info()))
def mergeLogmd(domain,ts): merge = False zero = False try: username = util.getUsernameByDomain(domain) # f = getMergePath(ts,username,domain) f = getMergeTmpPath(ts, username, domain) outnewpath = os.path.dirname(f) if not os.path.exists(outnewpath): os.makedirs(outnewpath) logger.debug("mergeLog Path: %s" % f) gz = gzip.GzipFile(filename=f, mode="w") # for cdn in getDomainProvider(domain): for cdn in ["dianxin"]: logger.debug(cdn) dpath = getMergedianxin(cdn, ts, username, domain) logger.debug(dpath) if os.path.exists(dpath): zero = True if dpath.endswith("gz"): with gzip.open(dpath) as g: for line in g: gz.write(line) else: with open(dpath) as g: for line in g: gz.write(line) gz.close() if zero: merge = True else: logger.debug("mergeLog Path: %s don't have logs." % f) os.remove(f) except: logger.debug("mergeLog Error: %s" % str(sys.exc_info())) mscheduler.enqueue_in(timedelta(minutes=5), mergeLog, domain, ts) finally: try: if merge: username = util.getUsernameByDomain(domain) f = getMergePath(ts, username, domain) outnewpath = os.path.dirname(f) if not os.path.exists(outnewpath): os.makedirs(outnewpath) # os.rename(getMergeTmpPath(ts,username,domain),f) shutil.move(getMergeTmpPath(ts, username, domain), f) r.sadd("ACCESSFILE_READY_LIST_" + username, "_".join([username, tsToDate(ts), tsToHour(ts), domain])) # AddUpCACHE.delete("_".join([tsToDate(ts),tsToHour(ts),domain])) except: logger.debug("mergeLog Error: %s" % str(sys.exc_info()))
def chinanetLogHandler(url,tyh,tm,domain): timeArray = time.strptime("_".join([tyh,tm]), "%Y-%m-%d_%H_%M") ts = int(time.mktime(timeArray)) username = util.getUsernameByDomain(domain) RESULT = False if url: RESULT = wget().download(url, getDestGzPath("ChinaNet", ts, username, domain)) else: return
def downloadLog(cdn, urls, ts, domain, retry_times=0): if retry_times < config.getint("retry", "log"): try: url = urls[0] except: url = '' username = util.getUsernameByDomain(domain) result = False if rCcvideo.sismember("fiveminutedomain", domain): f = getDestGzPathCc(cdn, ts, username, domain) elif cdn.find('jingdong') > -1: f = getDestPath(cdn, ts, username, domain) else: f = getDestGzPath(cdn, ts, username, domain) # f = 'C:\\data1\\storage\\logd\\aliyun\\2019-01-16\\wmsj\\18\\update1.csgo.wmsj.cn.gz' if url: logger.info(url) try: result = wget().download(url, f) except Exception as e: result = False logger.error("wget {} error".format(url), exc_info=True) else: generateEmptyFile(cdn, ts, domain) return if not result: retry_times += 1 scheduler.enqueue_in(timedelta(minutes=5), downloadLog, cdn, urls, ts, domain, retry_times) else: # TODO endtime = ts + 3600 # get_url = 'openapi.elmeast.com/insertaccesslogtodb?domain=www.baidu.com&accesslogname=update1.csgo.wmsj.cn.2019_01_16_08_00.gz&starttime=1234567890&endtime=1234567890&signature=fasdfaf' get_url = 'http://openapi.elmeast.com/insertaccesslogtodb?domain=' + domain + '&accesslogname=' + f + '&starttime=' + str(ts) + '&endtime=' + str(endtime) + '&signature=fasdfaf' if get_FileSize(f) != 0: requests.get(get_url)