def parseProcessLarge(self,size): """ """ ################### #check the filename if not os.path.exists(self.source_file_name): print "Sorry, the %s doesn't exist!,please check"%self.source_file_name return ############# #Files lines self.source_lines = 0 #souce meaningful file lines ###################### #Async Read file lines reader = self.readlinePartial() line = (reader.next()).rstrip('\t').rstrip('\r').rstrip('\n') self.startline = line line = line.split(information_split) self.start_line_process(line) ##################### #Calculate Split Size self.large_split_pos = [] self.largeSpiece(size) start = record_start_time("[Large Files Seperate]") # ########################################################### # #Seperate large file into small files by MSIndividual Value # while line: # line = reader.next() # self.seperateFiles(line) ########################################################### #new Version self.source_lines += 1 self.seperateLargeFiles(reader) record_finish_time(start,"[Large Files Seperate]") ###### #close for f in self.seperate_file: f.close() print '--------------Finish Seperate Files-----------------' ########################################################## #call init, parse start self.parseFilesThread()
def parseFilesThread(self): """ Process every single file from the seperate files """ ######################### #Every Single File Thread thread = [None] * (self.cnt + 1) self.thread_cnt = 0 self.thread_num = self.cnt + 1 self.starttime = record_start_time("[Threads Process]") ############## #start thread print "------------ parseProcessLarge ----------------" for i in range(0,self.cnt+1): thread[i] = threadParseWorker(source_filename= self.seperate_file_name[i], all_lines = self.source_lines, json_file_name = self.json_file[i] , save_path = self.save_path, max_conn = self.cnt + 2) thread[i].start() ############ #wait thread for i in range(0,self.cnt+1): thread[i].join() # thread_count_start = 0 # thread_count_end = 0 # while thread_count_end < self.cnt + 1: # thread_count_end = (thread_count_end + 10) if (thread_count_end + 10) < (self.cnt + 1) else (self.cnt + 1) # for i in range(thread_count_start, thread_count_end): # thread[i] = threadParseWorker(source_filename= self.seperate_file_name[i], # all_lines = self.source_lines, # json_file_name = self.json_file[i], # save_path = self.save_path, # max_conn = self.cnt + 2) # thread[i].start() # for i in range(thread_count_start, thread_count_end): # thread[i].join() # thread_count_start += 10 print "--------------------------------------------------" print "---------------All threads finished!--------------"