def handle(self, thread_id): most_wait = 10 while self.threadNums['download'] or not self.__dataQueue.empty(): data = self.__getData() if not data: if self.threadNums['download'] <= 1 and self.__dataQueue.empty( ): if most_wait <= 0: break most_wait -= 1 time.sleep(1) continue o_handle = handle.Handle(self.__config, data, self.__com, self.__comLock) o_handle.setThreadId(thread_id) o_handle.run() self.__handleTime += o_handle.getUsedTime() self.__handleCount += 1 funcUtil.recordStatus( self.__id, '%s uri: %s use time: %.2f size: %d' % (thread_id, str(o_handle.getUri()), o_handle.getUsedTime(), self.__dataQueue.qsize())) # print thread_id + ' uri: ' + str(o_handle.getUri()) + ' use time: ' + str(o_handle.getUsedTime()) + ' size: ' + str(self.__dataQueue.qsize()) self.status[thread_id] = self.STATUS_END self.threadNums['handle'] -= 1
def __process(self): self.__calculateInputDomain() self.__calculateOutputDomain() for file_name in os.listdir(self.__tmpDataDir): symbol = file_name.split('_')[0] if file_name[len(symbol) + 1:] != self.__fileName: continue funcUtil.recordStatus(self.__id, 'sampling %s data ...' % file_name) print 'sampling %s data ...' % file_name try: tmp_path = os.path.join(self.__tmpDataDir, file_name) with open(tmp_path, 'r') as f: content = f.read() content = content.split('\n') for line in content: if not line: continue self.__com[symbol] += json.loads(line) os.remove(tmp_path) except Exception, ex: print ex funcUtil.write_log('getDataProcess') self.__com[symbol].sort(Save.sortByDate) symbol_data = [] data_list = self.__com[symbol] data_len = len(data_list) for index, value in enumerate(data_list[0:-1]): x = self.__sample(value[1], self.__inputDomain) funcUtil.recordStatus( self.__id, 'has sample %d | %d' % (index + 1, data_len)) print 'has sample %d | %d' % (index + 1, data_len) next_data = data_list[index + 1] y = self.__sample(next_data, self.__outputDomain) symbol_data.append((x, y)) x_end = self.__sample(data_list[-1][1], self.__inputDomain) y_end = [0 for i in range(self.__outputNodes)] symbol_data.append((x_end, y_end)) funcUtil.recordStatus(self.__id, 'finish sample %s' % file_name) print 'finish sample %s' % file_name funcUtil.recordStatus(self.__id, 'start save %s data' % file_name) print 'start save %s data' % file_name self.__save(symbol, symbol_data) funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name) print 'finish saving %s' % file_name del self.__com[symbol]
class DispatchManager(base.BaseManager): def __init__(self, config, db_config): self.__threadNumOfDownload = config['thread_num_of_download'] self.__threadNumOfHandle = config['thread_num_of_handle'] self.__retryTimes = config['retry_times'] self.__tmpDataDir = config['tmp_data_dir'] self.__saveModule = config['save_module'] self.__id = config['id'] self.__fileName = config['start_date'] + '_' + config[ 'end_date'] + '_' + str(config['input_nodes']) + '_' + self.__id self.__config = config self.__dbConfig = db_config self.__uriLock = threading.Lock() self.__dataLock = threading.Lock() self.__fileLock = threading.Lock() self.__downloadThreadLock = threading.Lock() self.__failQueue = Queue.Queue() self.__failUriDict = {} self.__com = {} self.init() self.o_prepare = prepare.Prepare(self.__config, self.__dbConfig) self.__curlTime = 0 self.__curlCounts = 0 self.__handleTime = 0 self.__handleCount = 0 def __prepare(self): funcUtil.recordStatus(self.__id, 'preparing dispatch manager ...') self.__uriQueue, self.__dataQueue, self.__symbolList = self.o_prepare.run( ) self.__comLock = {} for symbol in self.__symbolList: if symbol not in self.__com: self.__com[symbol] = [] self.__comLock[symbol] = threading.Lock() try: tmp_path = os.path.join(self.__tmpDataDir, symbol + '_' + self.__fileName) if os.path.exists(tmp_path): os.remove(tmp_path) except Exception, ex: print ex funcUtil.write_log('deleteTmp') funcUtil.recordStatus(self.__id, 'finish preparing dispatch manager ...')
def __prepare(self): funcUtil.recordStatus(self.__id, 'preparing dispatch manager ...') self.__uriQueue, self.__dataQueue, self.__symbolList = self.o_prepare.run( ) self.__comLock = {} for symbol in self.__symbolList: if symbol not in self.__com: self.__com[symbol] = [] self.__comLock[symbol] = threading.Lock() try: tmp_path = os.path.join(self.__tmpDataDir, symbol + '_' + self.__fileName) if os.path.exists(tmp_path): os.remove(tmp_path) except Exception, ex: print ex funcUtil.write_log('deleteTmp')
def download(self, thread_id): try: uri = self.__getUri() while uri: o_download = download.Download(uri, self.__dataQueue) o_download.setThreadId(thread_id) o_download.run() self.__curlTime += o_download.getUsedTime() self.__curlCounts += 1 funcUtil.recordStatus( self.__id, '%s uri: %s use time: %.2f size: %d' % (thread_id, uri, o_download.getUsedTime(), self.__uriQueue.qsize())) print thread_id + ' uri: ' + uri + ' use time: ' + str( o_download.getUsedTime()) + ' size: ' + str( self.__uriQueue.qsize()) self.__addFailUri(uri, o_download.getErrorQueue()) uri = self.__getUri() except Exception, ex: print ex
print 'finish sample %s' % file_name funcUtil.recordStatus(self.__id, 'start save %s data' % file_name) print 'start save %s data' % file_name self.__save(symbol, symbol_data) funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name) print 'finish saving %s' % file_name del self.__com[symbol] for symbol, data in self.__com.iteritems(): self.__com[symbol].sort(Save.sortByDate) funcUtil.recordStatus(self.__id, 'sampling %s data ...' % symbol) print 'sampling %s data ...' % symbol symbol_data = [] data_list = self.__com[symbol] data_len = len(data_list) for index, value in enumerate(data_list[0:-1]): x = self.__sample(value[1], self.__inputDomain) funcUtil.recordStatus( self.__id, 'has sample %d | %d' % (index + 1, data_len)) print 'has sample %d | %d' % (index + 1, data_len) next_data = data_list[index + 1] y = self.__sample(next_data, self.__outputDomain) symbol_data.append((x, y))
def __process(self): # self.__calculateDomain() for file_name in os.listdir(self.__tmpDataDir): symbol = file_name.split('_')[0] if file_name[len(symbol) + 1:] != self.__fileName: continue funcUtil.recordStatus(self.__id, 'sampling %s data ...' % file_name) print 'sampling %s data ...' % file_name try: tmp_path = os.path.join(self.__tmpDataDir, file_name) with open(tmp_path, 'r') as f: content = f.read() content = content.split('\n') for line in content: if not line: continue self.__com[symbol] += json.loads(line) os.remove(tmp_path) except Exception, ex: print ex funcUtil.write_log('getDataProcess') self.__com[symbol].sort(Save.sortByDate) symbol_data = [] data_list = self.__com[symbol] data_len = len(data_list) for index, value in enumerate(data_list[0:-self.__xDays - self.__yDays + 1]): tmp_x = [value[1][-1][1]] for i in range(self.__xDays - 1): tmp_x.append(data_list[index + i + 1][1][-1][1]) x = tmp_x # x = self.__sample(value[1]) funcUtil.recordStatus( self.__id, 'has sample %d | %d' % (index + 1, data_len)) print 'has sample %d | %d' % (index + 1, data_len) next_data_start = data_list[index + self.__xDays] next_data_end = data_list[index + self.__yDays + self.__xDays - 1] y = (next_data_start[1][0][1], next_data_end[1][-1][1]) symbol_data.append((x, y)) tmp_x = [data_list[-self.__xDays][1][-1][1]] for i in range(self.__xDays - 1): tmp_x.append(data_list[-self.__xDays + i + 1][1][-1][1]) x_end = tmp_x y_end = (0, 0) symbol_data.append((x_end, y_end)) funcUtil.recordStatus(self.__id, 'finish sample %s' % file_name) print 'finish sample %s' % file_name funcUtil.recordStatus(self.__id, 'start save %s data' % file_name) print 'start save %s data' % file_name self.__save(symbol, symbol_data) funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name) print 'finish saving %s' % file_name del self.__com[symbol]