예제 #1
0
def processLocs():
    print 'Starting all locations search:'
    logo = logger('AllLocs')
    bd = BigData(logo, status_line_count=10000)
    bd.obj = ka
    #bd.processFile(open('/Users/gaurav/Documents/Work/Projects/DataMining/uncompressed/locations_cities/ny_11_1_to_11_15.data'), None)
    bd.processFiles(BigData.GetInputFiles(input_dir), None)
    return ka
예제 #2
0
def start(params):
    # crawl each data file and get data for the given location
    # store the data in the output file
    bd = BigData(params)
    city = City(CITY_NAME, bd, params['out_file_path'])
    input_files = bd.GetInputFiles(params['input_dir_path'])
    # Generate the tdf for the city
    city.generateTDF(input_files)
    # get nouns for the city
    city.getNounsTDF()

    # load another bigData obj for generating timeline
    params = {
        'input_dir_path': '',
        'input_file_path': city.filep,
        'out_file_path': None,
        'logger': params['logger']
    }
    bd = BigData(params)
    # get timeline for city
    city.getTimeLine(bd)
    # write timeline to file
    city.writeTimelineToFile(params['timeline_path'])
예제 #3
0
 def getTimeLine(self):
     bd = BigData(self.logger)
     for k in self.d:
         self.d[k].getTimeLine(bd)
예제 #4
0
 def generateTDF(self, input_files):
     self.bd = BigData(self.logger)
     self.bd.obj = self
     self.bdCheckCondition = self.CheckCondition
     self.bdDoSomething = self.DoSomething
     self.bd.processFiles(input_files,None)