class SingleStreamer: ###################################################################### # Construction & Initialization ###################################################################### def __init__(self, dataFile, outputFileName, wl, mtp, alpha, sigma, printSummary): ''' Constructor: dataFile file containing event stream outputFileName file in which the output is produced wl window length mtp minimal time span alpha minimal support threshold sigma minimal conference threshold printSummary print the summaries at the print interval ''' # Storing the values self.dataFile = dataFile self.outputfileName = outputFileName self.wl = wl self.mtp = mtp self.alpha = alpha self.sigma = sigma self.printSummary = printSummary # Setting switches self.time = 0 # Current time of the stream self.outputFile = open(outputFileName, "w") # Output file self.window = Window(wl) # Contents of window ###################################################################### # Mining ###################################################################### def mine(self): """ Read the itemsets, feed them into the summaries and prune the summaries. At certain times, print out the minimal window and the max-frequency of all the itemsets. """ # Opening the data file data = open(self.dataFile) # Processing one itemset at the time start = time.time() #print 'tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc' for line in data: # Setting the time one further event = Event(line) self.time = event.time out = self.window.append(event) if len(self.window) < self.wl: continue tsbatcher = Tsbatcher(self.mtp, self.window) #print self.window #self.epset.merge(tsbatcher.EpsCnt()) #ms = 0.02 #mc = 0.6 #epset = tsbatcher.episodeStatistic() #support = epset.computeSupport() #asupport = epset.computeASupport(ms) #wsupport = epset.computeWSupport(ms) #confidence = epset.computeConfidence() #print epset.__show__() #tcnt = 0 #lsupp = 0 #uasupp = 0 #uwsupp = 0 #uasuppc = 0 #uwsuppc = 0 #for key in support.keys(): # tcnt += 1 # if support[key] < ms: # lsupp += 1 # if asupport[key] >= ms: # uasupp += 1 # if confidence[key] >= mc: # uasuppc += 1 # if wsupport[key] >= ms: # uwsupp += 1 # if confidence[key] >= mc: # uwsuppc += 1 #print "%-5d%-6d%-7d%-7d%-8d%-8d"%(tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc) #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence") #for key in support.keys(): # if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] < 0.4: # print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key]) #print "#######################################################################################################" #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence") #for key in support.keys(): # if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] >= 0.4: # print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key]) #epset.__show__() #cnt = self.window.getCount(event) #print self.window[-1] #print out,cnt # Check whether we need to produce output self.output() #self.epset.__show__() end = time.time() logging.critical('Time spent mining: %f seconds' % (end - start)) # Closing the data file self.closeOutput() data.close() ###################################################################### # Getters ###################################################################### def getTime(self): """ Returns the current time (for the summary). Thus if there is a minimal window, the time is the time of the stream minus the length of the minimal window, unless we're in the new algorithm in which case the summary is up to date. """ if self.wl == 1: return self.time else: return self.time - self.wl def getStreamTime(self): """ Returns the current time of the stream, i.e. at the end, not before the minimal window. """ return self.time def getWindow(self): """ Returns the window. """ return self.window ###################################################################### # Outputting ###################################################################### def output(self): """ Produces output and writes it to the disk. """ # Logging: nice for progress monitoring logging.info("%d: %s" % (self.getStreamTime(), "output")) # For each summary, ask the maximal window # Write! self.outputFile.write("%s\n" % (str("test itemset"))) # "%s\t%d\t%d\t%f\n" % (str(itemset), self.getStreamTime(), start, maxFreq)) def closeOutput(self): """ Closes all the open output files """ self.outputFile.close() if self.printSummary: self.summaryOut.close()
class SingleStreamer: ###################################################################### # Construction & Initialization ###################################################################### def __init__(self, dataFile, outputFileName, wl, mtp, alpha, sigma, printSummary): ''' Constructor: dataFile file containing event stream outputFileName file in which the output is produced wl window length mtp minimal time span alpha minimal support threshold sigma minimal conference threshold printSummary print the summaries at the print interval ''' # Storing the values self.dataFile = dataFile self.outputfileName = outputFileName self.wl = wl self.mtp = mtp self.alpha = alpha self.sigma = sigma self.printSummary = printSummary # Setting switches self.time = 0 # Current time of the stream self.outputFile = open(outputFileName, "w") # Output file self.window = Window(wl) # Contents of window ###################################################################### # Mining ###################################################################### def mine(self): """ Read the itemsets, feed them into the summaries and prune the summaries. At certain times, print out the minimal window and the max-frequency of all the itemsets. """ # Opening the data file data = open(self.dataFile) # Processing one itemset at the time start = time.time() #print 'tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc' for line in data: # Setting the time one further event = Event(line) self.time = event.time out = self.window.append(event) if len(self.window) < self.wl: continue tsbatcher = Tsbatcher(self.mtp,self.window) #print self.window #self.epset.merge(tsbatcher.EpsCnt()) #ms = 0.02 #mc = 0.6 #epset = tsbatcher.episodeStatistic() #support = epset.computeSupport() #asupport = epset.computeASupport(ms) #wsupport = epset.computeWSupport(ms) #confidence = epset.computeConfidence() #print epset.__show__() #tcnt = 0 #lsupp = 0 #uasupp = 0 #uwsupp = 0 #uasuppc = 0 #uwsuppc = 0 #for key in support.keys(): # tcnt += 1 # if support[key] < ms: # lsupp += 1 # if asupport[key] >= ms: # uasupp += 1 # if confidence[key] >= mc: # uasuppc += 1 # if wsupport[key] >= ms: # uwsupp += 1 # if confidence[key] >= mc: # uwsuppc += 1 #print "%-5d%-6d%-7d%-7d%-8d%-8d"%(tcnt,lsupp,uasupp,uwsupp,uasuppc,uwsuppc) #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence") #for key in support.keys(): # if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] < 0.4: # print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key]) #print "#######################################################################################################" #print "%-12s|%-12s|%-12s|%-12s|%-12s"%("key","support","asupport","wsupport","confidence") #for key in support.keys(): # if support[key] < 0.02 and (asupport[key] >= 0.02 or wsupport[key] >= 0.02) and confidence[key] >= 0.4: # print "%-12s|%-12s|%-12s|%-12s|%-12s"%(key,support[key],asupport[key],wsupport[key],confidence[key]) #epset.__show__() #cnt = self.window.getCount(event) #print self.window[-1] #print out,cnt # Check whether we need to produce output self.output() #self.epset.__show__() end = time.time() logging.critical('Time spent mining: %f seconds' % (end - start)) # Closing the data file self.closeOutput() data.close() ###################################################################### # Getters ###################################################################### def getTime(self): """ Returns the current time (for the summary). Thus if there is a minimal window, the time is the time of the stream minus the length of the minimal window, unless we're in the new algorithm in which case the summary is up to date. """ if self.wl == 1: return self.time else: return self.time - self.wl def getStreamTime(self): """ Returns the current time of the stream, i.e. at the end, not before the minimal window. """ return self.time def getWindow(self): """ Returns the window. """ return self.window ###################################################################### # Outputting ###################################################################### def output(self): """ Produces output and writes it to the disk. """ # Logging: nice for progress monitoring logging.info("%d: %s" % (self.getStreamTime(), "output")) # For each summary, ask the maximal window # Write! self.outputFile.write( "%s\n" % (str("test itemset"))) # "%s\t%d\t%d\t%f\n" % (str(itemset), self.getStreamTime(), start, maxFreq)) def closeOutput(self): """ Closes all the open output files """ self.outputFile.close() if self.printSummary: self.summaryOut.close()
class Streamer(threading.Thread): ###################################################################### # Construction & Initialization ###################################################################### def __init__(self, name, wl, dataFile): ''' Constructor: name name of the stream in multiple data stream wl window length dataFile file containing event stream ''' super(Streamer,self).__init__() # Storing the values self.name = name self.wl = wl self.dataFile = dataFile # Setting switches self.window = Window(wl) # Contents of window ###################################################################### # Sliding Window ###################################################################### def run(self): """ Read new event data and sliding the window. """ global streamers,miners,conds,windows,nwins # Opening the data file active = True event = Event() #for line in open(self.dataFile,"r"): file = open(self.dataFile) # in real application, here should be a buffer to store stream data while True: if conds[self.name].acquire(): if self.name not in windows.keys(): line = file.readline() if len(line) == 0: active = False else: event = Event(line) out = self.window.append(event) if len(self.window) < self.wl and active: conds[self.name].release() continue windows[self.name] = self.window print "\tStreamer "+self.name+":"+"window is prepared." #print "Streamer "+self.name+":",windows[self.name] nwins[self.name] = nwins[self.name] + 1 conds[self.name].notify() if not active: conds[self.name].release() break conds[self.name].release() else: conds[self.name].wait() time.sleep(1) file.close() print "\tStreamer "+self.name+":"+"is over."