def processLocs(): print 'Starting all locations search:' logo = logger('AllLocs') bd = BigData(logo, status_line_count=10000) bd.obj = ka #bd.processFile(open('/Users/gaurav/Documents/Work/Projects/DataMining/uncompressed/locations_cities/ny_11_1_to_11_15.data'), None) bd.processFiles(BigData.GetInputFiles(input_dir), None) return ka
def start(params): # crawl each data file and get data for the given location # store the data in the output file bd = BigData(params) city = City(CITY_NAME, bd, params['out_file_path']) input_files = bd.GetInputFiles(params['input_dir_path']) # Generate the tdf for the city city.generateTDF(input_files) # get nouns for the city city.getNounsTDF() # load another bigData obj for generating timeline params = { 'input_dir_path': '', 'input_file_path': city.filep, 'out_file_path': None, 'logger': params['logger'] } bd = BigData(params) # get timeline for city city.getTimeLine(bd) # write timeline to file city.writeTimelineToFile(params['timeline_path'])
def getTimeLine(self): bd = BigData(self.logger) for k in self.d: self.d[k].getTimeLine(bd)
def generateTDF(self, input_files): self.bd = BigData(self.logger) self.bd.obj = self self.bdCheckCondition = self.CheckCondition self.bdDoSomething = self.DoSomething self.bd.processFiles(input_files,None)