def writeNRows(redisConnection,pipe,usePiped,pipeSize,reader, num, timeMultiplier,contThreshold,logger,headers,key,submitId,includeList,measure): global blockcounter global blockDatacounter previous = 0 noVal=-1.0 theseTimes=[] vector=[] for x in range(0,num): row = reader.next() for x in range(0,len(row)): thisVal = conversions.toNum(row[x],noVal) if (thisVal <> noVal): if headers[x]["use"]: if (headers[x]["label"]==key): if (submitId=="0"): thisKey = thisVal else: thisKey = submitId elif (headers[x]["label"]==measure): thisMeasure = thisVal if (headers[x]["label"] in includeList): vector.append(thisVal) blockcounter = blockcounter + 1 blockDatacounter = blockDatacounter + 1 thisKey = int(row[0]) if timeMultiplier>1: thisTime = int(float(row[1])*int(timeMultiplier)) else: thisTime = row[1] if (previous == 0) or ((thisTime-previous)<=contThreshold): theseTimes.append(thisTime) else: if len(theseTimes)>0: writeCoverage(theseTimes,pipe,usePiped,pipeSize,redisConnection,thisKey,logger) writeData(theseTimes,vector,pipe,usePiped,pipeSize,redisConnection,thisKey,logger) theseTimes=[] vector=[] previous = thisTime if len(theseTimes)>0: writeCoverage(theseTimes,pipe,usePiped,pipeSize,redisConnection,thisKey,logger) writeData(theseTimes,vector,pipe,usePiped,pipeSize,redisConnection,thisKey,logger)
def loadFile(path, key, measure,redisConnection,usePiped, pipe, config, logger, startRow=0): logger.info("Loading file: "+path) logger.info("Starting at row: "+str(startRow)) if (usePiped == True): logger.info("Use Pipe: True") else: logger.info("Use Pipe: False") ifile = open(path, "rb") reader = csv.reader(ifile,delimiter=',') headers=[] noVal = float(config["noVal"]) excludeList=config["excludeList"] includeList=config["includeList"] passThroughs=config["passThroughList"] excludePattern=config["excludePattern"] redisLifetime=int(config["redisLifetime"]) submitRate = int(config["submitRate"]) submitRecords = int(config["submitRecords"]) submitId = config["submitId"] pipeSize = int(config["pipeSize"]) timeOffset = config["continuous"] * config["multiplier"] currentStart = 0 currentTime = 0 lastTime = 0 counter = 0 indexCounter=0 dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) or isinstance(obj, datetime.date) else None #On the first row... row=reader.next() try: header[0] = int(row[0]) except: for x in range(0,len(row)): thisEntry=dict() thisEntry["label"] = str(row[x]) if ((row[x] in excludeList) or (row[x][0] in excludePattern)): thisEntry["use"] = False elif (row[x] in includeList): thisEntry["use"] = True elif (row[x] in passThroughs): thisEntry["use"] = True else: thisEntry["use"] = False headers.append(thisEntry) #Skip rows, if needed for x in range(1,startRow): reader.next() #Now process... for row in reader: counter = counter +1 vector=[noVal]*len(includeList) thisKey = 0 for x in range(0,len(row)): thisVal = conversions.toNum(row[x],config["noVal"]) if (thisVal <> noVal): if headers[x]["use"]: if (headers[x]["label"]==key): if (submitId=="0"): thisKey = thisVal else: thisKey = submitId elif (headers[x]["label"]==measure): thisMeasure = thisVal if (headers[x]["label"] in includeList): vector[includeList.index(headers[x]["label"])]= thisVal #This block builds the coverage index... if (headers[x]["label"] == config["timestamp"]): currentTime = thisVal if (lastTime == 0): #We are just starting startTime = currentTime lastTime = currentTime elif ((currentTime-lastTime) <= timeOffset): #We are still in a continuous block lastTime = currentTime else: #We are in a new interval. startTime and lastTime define a block... #s= '{"creationTime":'+json.dumps(datetime.datetime.now(), default=dthandler)+',"endTime":%.8f}' %(lastTime) s= '{"endTime":%.8f}' %(lastTime) indexCounter = indexCounter+1 #Write the index if usePiped: #NOTE: Upon number of records = buffer, pipe gets executed below. # Pipe contains both coverage index requests AND data writePipedRedis(pipe,str(thisKey)+"-Ind", startTime, s,redisLifetime) else: writeRedis(redisConnection,str(thisKey)+"-Ind", startTime, s,redisLifetime) #Start a new interval startTime = currentTime lastTime = currentTime packed = struct.pack('%sd' % len(vector), *vector) if usePiped: writePipedRedis(pipe,thisKey, thisMeasure, packed,redisLifetime) if ((counter % pipeSize) == 0): logger.info("\t"+str(counter)+ " rows loaded via pipe") #NOTE: This dumps data to both the data collection AND the coverage index.... pipe.execute() else: if ((counter % 1000) == 0): logger.info("\t"+str(counter)+ " rows loaded") writeRedis(redisConnection,thisKey, thisMeasure, packed,redisLifetime) packed = None buf2 = None vector= None if (submitRate<>0): if (counter == submitRecords): break logger.info("Done loading data. "+str(counter) +" rows loaded") #Close out the coverage index #s= '{"creationTime":'+json.dumps(datetime.datetime.now(), default=dthandler)+',"endTime":%.8f}' %(lastTime) s= '{"endTime":%.8f}' %(lastTime) if usePiped: writePipedRedis(pipe,str(thisKey)+"-Ind", startTime, s,redisLifetime) #Dump anything left in the pipe. pipe.execute() else: writeRedis(redisConnection,str(thisKey)+"-Ind", startTime, s,redisLifetime) logger.info("Done loading coverage index. "+str(indexCounter) +" continuous intervals loaded")