Esempio n. 1
0
class SingleStreamer:

    ######################################################################
    # Construction & Initialization
    ######################################################################
    def __init__(self, dataFile, outputFileName, wl, mtp, alpha, sigma,
                 printSummary):
        '''
        Constructor: 
        dataFile        file containing event stream
        outputFileName  file in which the output is produced
        wl              window length
        mtp             minimal time span
        alpha           minimal support threshold
        sigma           minimal conference threshold
        printSummary    print the summaries at the print interval
        '''
        # Storing the values
        self.dataFile = dataFile
        self.outputfileName = outputFileName
        self.wl = wl
        self.mtp = mtp
        self.alpha = alpha
        self.sigma = sigma
        self.printSummary = printSummary

        # Setting switches
        self.time = 0  # Current time of the stream
        self.outputFile = open(outputFileName, "w")  # Output file
        self.window = Window(wl)  # Contents of window

    ######################################################################
    # Mining
    ######################################################################
    def mine(self):
        """
        Read the itemsets, feed them into the summaries and prune the summaries.
        At certain times, print out the minimal window and the max-frequency of 
        all the itemsets. 
        """
        # Opening the data file
        data = open(self.dataFile)

        # Processing one itemset at the time
        start = time.time()
        #print 'tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc'
        for line in data:
            # Setting the time one further
            event = Event(line)
            self.time = event.time
            out = self.window.append(event)
            if len(self.window) < self.wl:
                continue
            tsbatcher = Tsbatcher(self.mtp, self.window)

            #print self.window
            #self.epset.merge(tsbatcher.EpsCnt())
            #ms = 0.02
            #mc = 0.6
            #epset      = tsbatcher.episodeStatistic()
            #support    = epset.computeSupport()
            #asupport   = epset.computeASupport(ms)
            #wsupport   = epset.computeWSupport(ms)
            #confidence = epset.computeConfidence()

            #print epset.__show__()
            #tcnt  = 0
            #lsupp = 0
            #uasupp = 0
            #uwsupp = 0
            #uasuppc = 0
            #uwsuppc = 0

            #for key in support.keys():
            #     tcnt += 1
            #     if support[key] < ms:
            #        lsupp += 1
            #        if asupport[key] >= ms:
            #            uasupp += 1
            #            if confidence[key] >= mc:
            #               uasuppc += 1
            #        if wsupport[key] >= ms:
            #           uwsupp += 1
            #           if confidence[key] >= mc:
            #              uwsuppc += 1

            #print "%-5d%-6d%-7d%-7d%-8d%-8d"%(tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc)

            #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence")
            #for key in support.keys():
            #    if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] < 0.4:
            #        print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key])

            #print "#######################################################################################################"
            #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence")
            #for key in support.keys():
            #    if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] >= 0.4:
            #        print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key])

            #epset.__show__()

            #cnt = self.window.getCount(event)
            #print self.window[-1]
            #print out,cnt
            # Check whether we need to produce output
            self.output()

        #self.epset.__show__()
        end = time.time()
        logging.critical('Time spent mining: %f seconds' % (end - start))

        # Closing the data file
        self.closeOutput()
        data.close()

    ######################################################################
    # Getters
    ######################################################################
    def getTime(self):
        """
        Returns the current time (for the summary). Thus if there is a minimal 
        window, the time is the time of the stream minus the length of the 
        minimal window, unless we're in the new algorithm in which case the
        summary is up to date. 
        """
        if self.wl == 1:
            return self.time
        else:
            return self.time - self.wl

    def getStreamTime(self):
        """
        Returns the current time of the stream, i.e. at the end, not before
        the minimal window. 
        """
        return self.time

    def getWindow(self):
        """
        Returns the window. 
        """
        return self.window

    ######################################################################
    # Outputting
    ######################################################################
    def output(self):
        """
        Produces output and writes it to the disk. 
        """
        # Logging: nice for progress monitoring
        logging.info("%d: %s" % (self.getStreamTime(), "output"))

        # For each summary, ask the maximal window
        # Write!
        self.outputFile.write("%s\n" % (str("test itemset")))
        #    "%s\t%d\t%d\t%f\n" % (str(itemset), self.getStreamTime(), start, maxFreq))

    def closeOutput(self):
        """
        Closes all the open output files
        """
        self.outputFile.close()

        if self.printSummary:
            self.summaryOut.close()
Esempio n. 2
0
class SingleStreamer:

    ######################################################################
    # Construction & Initialization
    ######################################################################
    def __init__(self, dataFile, outputFileName, wl, mtp, alpha, sigma, printSummary):
        '''
        Constructor: 
        dataFile        file containing event stream
        outputFileName  file in which the output is produced
        wl              window length
        mtp             minimal time span
        alpha           minimal support threshold
        sigma           minimal conference threshold
        printSummary    print the summaries at the print interval
        '''
        # Storing the values
        self.dataFile       = dataFile
        self.outputfileName = outputFileName
        self.wl             = wl
        self.mtp            = mtp
        self.alpha			= alpha
        self.sigma          = sigma
        self.printSummary   = printSummary
        
		# Setting switches
        self.time           = 0                             # Current time of the stream
        self.outputFile     = open(outputFileName, "w")     # Output file
        self.window       	= Window(wl)         			# Contents of window
    
    ######################################################################
    # Mining
    ######################################################################
    def mine(self):
        """
        Read the itemsets, feed them into the summaries and prune the summaries.
        At certain times, print out the minimal window and the max-frequency of 
        all the itemsets. 
        """
		# Opening the data file
        data = open(self.dataFile)
        
        # Processing one itemset at the time
        start = time.time()
        #print 'tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc'
        for line in data:
            # Setting the time one further
            event = Event(line)	
            self.time = event.time
            out = self.window.append(event)
            if len(self.window) < self.wl:
			    continue
            tsbatcher = Tsbatcher(self.mtp,self.window) 
    
            #print self.window
            #self.epset.merge(tsbatcher.EpsCnt())
            #ms = 0.02
            #mc = 0.6
            #epset      = tsbatcher.episodeStatistic()
            #support    = epset.computeSupport() 
            #asupport   = epset.computeASupport(ms) 
            #wsupport   = epset.computeWSupport(ms) 
            #confidence = epset.computeConfidence() 
             
            #print epset.__show__()
            #tcnt  = 0
            #lsupp = 0
            #uasupp = 0
            #uwsupp = 0
            #uasuppc = 0
            #uwsuppc = 0
            
            #for key in support.keys():
            #     tcnt += 1 
            #     if support[key] < ms:
            #        lsupp += 1 
            #        if asupport[key] >= ms:
            #            uasupp += 1 
            #            if confidence[key] >= mc:
            #               uasuppc += 1 
            #        if wsupport[key] >= ms:
            #           uwsupp += 1 
            #           if confidence[key] >= mc:
            #              uwsuppc += 1 

            #print "%-5d%-6d%-7d%-7d%-8d%-8d"%(tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc)
            
            #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence")
            #for key in support.keys():
            #    if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] < 0.4:
            #        print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key])
           
            #print "#######################################################################################################"
            #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence")
            #for key in support.keys():
            #    if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] >= 0.4:
            #        print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key])
            
            #epset.__show__()
        
            #cnt = self.window.getCount(event)
            #print self.window[-1] 
            #print out,cnt
            # Check whether we need to produce output
            self.output()
            
        #self.epset.__show__()
        end = time.time()
        logging.critical('Time spent mining: %f seconds' % (end - start))
        
        # Closing the data file
        self.closeOutput()
        data.close()
    
    
    ######################################################################
    # Getters
    ######################################################################
    def getTime(self):
        """
        Returns the current time (for the summary). Thus if there is a minimal 
        window, the time is the time of the stream minus the length of the 
        minimal window, unless we're in the new algorithm in which case the
        summary is up to date. 
        """
        if self.wl == 1:
            return self.time
        else:
            return self.time - self.wl
    
    
    def getStreamTime(self):
        """
        Returns the current time of the stream, i.e. at the end, not before
        the minimal window. 
        """
        return self.time
    
    
    def getWindow(self):
        """
        Returns the window. 
        """
        return self.window
    
    
    
    ######################################################################
    # Outputting
    ######################################################################
    def output(self):
        """
        Produces output and writes it to the disk. 
        """
        # Logging: nice for progress monitoring
        logging.info("%d: %s" % (self.getStreamTime(), "output"))
        
        # For each summary, ask the maximal window
        # Write!
        self.outputFile.write(
                "%s\n" % (str("test itemset")))
            #    "%s\t%d\t%d\t%f\n" % (str(itemset), self.getStreamTime(), start, maxFreq))
        
    def closeOutput(self):
        """
        Closes all the open output files
        """
        self.outputFile.close()
        
        if self.printSummary:
            self.summaryOut.close()
Esempio n. 3
0
class Streamer(threading.Thread):

    ######################################################################
    # Construction & Initialization
    ######################################################################
   
    def __init__(self, name, wl, dataFile):
        '''
        Constructor: 
        name            name of the stream in multiple data stream 
        wl              window length
        dataFile        file containing event stream
        '''
        super(Streamer,self).__init__() 
        
        # Storing the values
        self.name           = name
        self.wl             = wl
        self.dataFile       = dataFile
        
		# Setting switches
        self.window       	= Window(wl)  			# Contents of window
        
    ######################################################################
    # Sliding Window
    ######################################################################
    
    def run(self):
        """
        Read new event data and sliding the window.
        """
        global streamers,miners,conds,windows,nwins

        # Opening the data file
        active = True
        event = Event() 
        #for line in open(self.dataFile,"r"): 
        file = open(self.dataFile)
        # in real application, here should be a buffer to store stream data 
        while True: 
            if conds[self.name].acquire():
              if self.name not in windows.keys():
                 line = file.readline()
                 if len(line) == 0:
                    active = False
                 else: 
                    event = Event(line)	
                    out = self.window.append(event)
                 
                 if len(self.window) < self.wl and active:
                    conds[self.name].release()  
                    continue
                 
                 windows[self.name]  = self.window
                 print "\tStreamer "+self.name+":"+"window is prepared."
                 #print "Streamer "+self.name+":",windows[self.name]
                 nwins[self.name] = nwins[self.name] + 1
                 conds[self.name].notify() 
                 
                 if not active:
                    conds[self.name].release()  
                    break

              conds[self.name].release()  
            else:
              conds[self.name].wait()
        
            time.sleep(1)

        file.close()
        print "\tStreamer "+self.name+":"+"is over."