def __init__(self, manager,config):
     '''
     Constructor
     '''
     self.manager=manager
     self.config=config
     self.generator=RecordGenerator(self.config)
 def __init__(self, manager,config):
     '''
     Constructor
     '''
     self.manager=manager
     self.config=config
     self.generator=RecordGenerator(self.config)
Example #3
0
class UniformService(object):
    '''
    classdocs
    '''


    def __init__(self, manager,config):
        '''
        Constructor
        '''
        self.manager=manager
        self.config=config
        self.generator=RecordGenerator(self.config)
        
    def process(self):
        startTime=datetime.datetime.strptime(self.config.find('startindex').text, "%Y-%m-%dT%H:%M:%S")
        endTime=datetime.datetime.strptime(self.config.find('endindex').text, "%Y-%m-%dT%H:%M:%S")
        timeIterator=startTime
            
        while timeIterator < endTime:                 
            self.manager.push(self.generator.generateData(timeIterator))         
            timeIterator += self.getTimeDelta() 
    
    def getTimeDelta(self):
        timeDelta=self.config.find('timedelta').text
        timedeltaunit=self.config.find('timedeltaunit').text
        #microseconds, milliseconds, seconds, minutes, hours, days, weeks
        if timedeltaunit == 'seconds':
            return datetime.timedelta(seconds=int(timeDelta))
        if timedeltaunit == 'milliseconds':
            return datetime.timedelta(milliseconds=int(timeDelta))
        if timedeltaunit == 'minutes':
            return datetime.timedelta(minutes=int(timeDelta))
        if timedeltaunit == 'hours':
            return datetime.timedelta(hours=int(timeDelta))
        if timedeltaunit == 'days':
            return datetime.timedelta(days=int(timeDelta))
        if timedeltaunit == 'weeks':
            return datetime.timedelta(weeks=int(timeDelta)) 
        return datetime.timedelta(seconds=int(timeDelta))
class FixedLoadService(object):
    '''
    This will generate variable timestamp based record based on load patern.
    '''

    def __init__(self, manager,config):
        '''
        Constructor
        '''
        self.manager=manager
        self.config=config
        self.generator=RecordGenerator(self.config)
        
    def process(self):        
                       
        total_record = int(self.config.find('totalrecord').text)                 
        self.generateRecords(total_record)           
        self.manager.flushBatch()                
        
    def generateRecords(self,total_record):
        for i in range(total_record):                        
            self.manager.push(self.generator.generateDataCount(i)) 
class FixedLoadService(object):
    """
    This will generate variable timestamp based record based on load patern.
    """

    def __init__(self, manager, config):
        """
        Constructor
        """
        self.manager = manager
        self.config = config
        self.generator = RecordGenerator(self.config)

    def process(self):

        total_record = int(self.config.find("totalrecord").text)
        self.generateRecords(total_record)
        self.manager.flushBatch()

    def generateRecords(self, total_record):
        for i in range(total_record):
            self.manager.push(self.generator.generateDataCount(i))
class LoadService(object):
    '''
    This will generate variable timestamp based record based on load patern.
    '''

    def __init__(self, manager,config):
        '''
        Constructor
        '''
        self.manager=manager
        self.config=config
        self.generator=RecordGenerator(self.config)
        
    def process(self):
        startTime=datetime.datetime.strptime(self.config.find('startindex').text, "%Y-%m-%dT%H:%M:%S")
                       
        totaltweet = int(self.config.find('timedistribution').find('totalrecord').text)
        patternrepeat = int(self.config.find('timedistribution').find('patternrepeat').text)
        patternrepeatunit = self.config.find('timedistribution').find('patternrepeatunit').text               
        perloadCount=totaltweet/patternrepeat
                
        for i in range(patternrepeat):            
            self.processSinglePattern(startTime+(i*datetime.timedelta(seconds=self.getPatternSeconds(patternrepeatunit))),perloadCount,patternrepeatunit)            
        
        self.manager.flushBatch()
    
    def processSinglePattern(self,startTime,perloadCount,patternrepeatunit):        
        #This gets the distribution in the time interval        
        loadDistribution= self.getLoadDistibutionCount(perloadCount,self.getPaternRange(patternrepeatunit),self.config.find('timedistribution').find('patternloaddistribution'))                
        second_distribution=[]
        for tweet_count in loadDistribution:
            second_distribution.append(self.generateSecondDistribution(tweet_count,self.getPatternUnitSeconds(patternrepeatunit)))        
         
        t=startTime                
        for i,sd in enumerate(second_distribution):
            keys=sorted(sd, key=sd.get)
            for key in keys:
                self.generateRecords(t+datetime.timedelta(seconds=key), sd[key])
            t+= datetime.timedelta(seconds=int(self.getPatternUnitSeconds(patternrepeatunit)))
            print "Insert done for : %s - Records added %s" %(t,loadDistribution[i])
           
        
    def generateRecords(self,timestamp,numbers):
        for i in range(numbers):                        
            self.manager.push(self.generator.generateData(timestamp)) 
        

    def generateSecondDistribution(self,tweet_count,seconds_in_distribution):
        random_delta=int(tweet_count/1000)
        if random_delta == 0:
            random_delta=1
        tweets_left=tweet_count
        d={}
        while tweets_left > 0:
            r=randint(1,random_delta)
            s=randint(0,seconds_in_distribution-1)
            if s in d:
                d[s] += r
            else:
                d[s] = r
            tweets_left-=r        
        return d 
        
    def getLoadDistibutionCount(self,perload_count,pattern_range,load_distribution_config):
        load_distribution=[]
        load_total=0
        for pattern_range in load_distribution_config.findall('range'):
            load_total+=float(pattern_range.get('load'))
        normalize_factor=1/(load_total/100)
        
        for pattern_range in load_distribution_config.findall('range'):
            start_dist=int(pattern_range.get('start'))
            end_dist=int(pattern_range.get('end'))
            load= (normalize_factor * float(pattern_range.get('load')))/100
            distribution_count=perload_count*load
            for i in range(abs(end_dist-start_dist)):
                load_distribution.append(int(distribution_count/(abs(end_dist-start_dist))))
        
        return load_distribution                       
    
    def getPatternSeconds(self,patternrepeatunit):  
        if patternrepeatunit == "day":
            return 86400
        elif patternrepeatunit == "hour":
            return 3600
        elif patternrepeatunit == "week":
            return 604800
        elif patternrepeatunit == "month":
            return 2592000
        else:
            return 1
            
    def getPatternUnitSeconds(self,patternrepeatunit):  
        if patternrepeatunit == "day":
            return 3600
        elif patternrepeatunit == "hour":
            return 60
        elif patternrepeatunit == "week":
            return 86400
        elif patternrepeatunit == "month":
            return 86400
        else:
            return 1   
    
    def getPaternRange(self,patternrepeatunit):
        if patternrepeatunit == "day":
            return 24
        elif patternrepeatunit == "hour":
            return 60
        elif patternrepeatunit == "week":
            return 7
        elif patternrepeatunit == "month":
            return 30
        else:
            return 1