def getInputDaysStr(sortedDays,nowDay,baseDir,N=7): baseFiles=getFiles(baseDir) baseFileDict={} for f in baseFiles: baseFileDict[f]=1 if len(sortedDays) > 0: lastDay=TimeUtil.addDay(sortedDays[-1],1) if not baseDir.endswith("/"): baseDir=baseDir+"/" inputDir="" while lastDay <= nowDay: if len(inputDir) > 0: inputDir+="," newDir=baseDir+lastDay if newDir not in baseFileDict: sys.stderr.write(newDir+"is not exists!\n") continue inputDir+=newDir lastDay=TimeUtil.addDay(lastDay,1) else: inputDir="" for i in range(N): if len(inputDir) > 0: inputDir+="," newDir=baseDir+datetime.strftime(date.today()-timedelta(days=i+1),"%Y-%m-%d") if newDir not in baseFileDict: sys.stderr.write(newDir+"is not exists!\n") continue inputDir+=newDir return inputDir
def getInputDaysStr(sortedDays, nowDay, baseDir, N=7): baseFiles = getFiles(baseDir) baseFileDict = {} for f in baseFiles: baseFileDict[f] = 1 if len(sortedDays) > 0: lastDay = TimeUtil.addDay(sortedDays[-1], 1) if not baseDir.endswith("/"): baseDir = baseDir + "/" inputDir = "" while lastDay <= nowDay: if len(inputDir) > 0: inputDir += "," newDir = baseDir + lastDay if newDir not in baseFileDict: sys.stderr.write(newDir + "is not exists!\n") continue inputDir += newDir lastDay = TimeUtil.addDay(lastDay, 1) else: inputDir = "" for i in range(N): if len(inputDir) > 0: inputDir += "," newDir = baseDir + datetime.strftime( date.today() - timedelta(days=i + 1), "%Y-%m-%d") if newDir not in baseFileDict: sys.stderr.write(newDir + "is not exists!\n") continue inputDir += newDir return inputDir
def getLastDay(hdfsDir, N=7): sortedDays = getDays(hdfsDir) if len(sortedDays) > 0: lastDay = TimeUtil.addDay(sortedDays[-1], 1) return lastDay else: return datetime.strftime(date.today() - timedelta(days=N), "%Y-%m-%d")
def getLastDay(hdfsDir,N=7): sortedDays=getDays(hdfsDir) if len(sortedDays) > 0: lastDay=TimeUtil.addDay(sortedDays[-1],1) return lastDay else: return datetime.strftime(date.today()-timedelta(days=N),"%Y-%m-%d")
def readMall(day,end): db=DB() db.connect("haodou_mall") cursor=db.execute("select og.UserId,og.GoodsId,max(ol.CreateTime),ol.OrderId from OrderBase as ob,OrderGoods as og, OrderLog as ol where og.OrderId=ob.OrderId and ob.OrderId=ol.OrderId and ob.OrderStatus=70 and ol.CreateTime >='"+day+" 00:00:00' and ol.CreateTime <= '"+end+" 23:59:59' group by og.UserId,og.GoodsId,ol.OrderId;") ret=cursor.fetchall() for r in ret: #sys.stdout.write(str(r[2])+"\n") nutPrint("uid-"+str(r[0]),"buy","goods-%d"%(r[1]),str(int(time.mktime(r[2].timetuple())))) if __name__=="__main__": if len(sys.argv) >= 4 and sys.argv[1] == "acc": hdfsDir=sys.argv[2] N=int(sys.argv[3]) sortedDays=hdfsFile.getDays(hdfsDir) if len(sortedDays) > 0: lastDay=TimeUtil.addDay(sortedDays[-1],1) else: lastDay=datetime.strftime(date.today()-timedelta(days=N),"%Y-%m-%d") end=datetime.strftime(date.today()-timedelta(days=1),"%Y-%m-%d") sys.stderr.write("lastDay for dbNut:"+lastDay+"\n") sys.stderr.write("end for dbNut:"+end+"\n") readComment(lastDay,end) readMall(lastDay,end) elif len(sys.argv) >= 3: readComment(sys.argv[1],sys.argv[2]) readMall(sys.argv[1],sys.argv[2]) elif len(sys.argv) >= 2: N=int(sys.argv[1]) end=datetime.strftime(date.today()-timedelta(days=1),"%Y-%m-%d") day=datetime.strftime(date.today()-timedelta(days=N),"%Y-%m-%d") print N,day,end
import sys sys.path.append("../util") import TimeUtil print TimeUtil.addDay(sys.argv[1],int(sys.argv[2]))
import sys sys.path.append("../util") import TimeUtil print TimeUtil.addDay(sys.argv[1], int(sys.argv[2]))