Esempio n. 1
0
    def handle(self, thread_id):
        most_wait = 10
        while self.threadNums['download'] or not self.__dataQueue.empty():
            data = self.__getData()
            if not data:
                if self.threadNums['download'] <= 1 and self.__dataQueue.empty(
                ):
                    if most_wait <= 0:
                        break
                    most_wait -= 1
                    time.sleep(1)
                continue

            o_handle = handle.Handle(self.__config, data, self.__com,
                                     self.__comLock)
            o_handle.setThreadId(thread_id)
            o_handle.run()

            self.__handleTime += o_handle.getUsedTime()
            self.__handleCount += 1
            funcUtil.recordStatus(
                self.__id, '%s  uri: %s  use time: %.2f  size: %d' %
                (thread_id, str(o_handle.getUri()), o_handle.getUsedTime(),
                 self.__dataQueue.qsize()))
            # print thread_id + '    uri: ' + str(o_handle.getUri()) + '   use time: ' + str(o_handle.getUsedTime()) + '  size: ' + str(self.__dataQueue.qsize())

        self.status[thread_id] = self.STATUS_END
        self.threadNums['handle'] -= 1
Esempio n. 2
0
    def __process(self):
        self.__calculateInputDomain()
        self.__calculateOutputDomain()

        for file_name in os.listdir(self.__tmpDataDir):
            symbol = file_name.split('_')[0]
            if file_name[len(symbol) + 1:] != self.__fileName:
                continue

            funcUtil.recordStatus(self.__id,
                                  'sampling %s data ...' % file_name)
            print 'sampling %s data ...' % file_name

            try:
                tmp_path = os.path.join(self.__tmpDataDir, file_name)
                with open(tmp_path, 'r') as f:
                    content = f.read()
                    content = content.split('\n')
                    for line in content:
                        if not line:
                            continue
                        self.__com[symbol] += json.loads(line)

                os.remove(tmp_path)
            except Exception, ex:
                print ex
                funcUtil.write_log('getDataProcess')

            self.__com[symbol].sort(Save.sortByDate)

            symbol_data = []
            data_list = self.__com[symbol]
            data_len = len(data_list)
            for index, value in enumerate(data_list[0:-1]):
                x = self.__sample(value[1], self.__inputDomain)
                funcUtil.recordStatus(
                    self.__id, 'has sample %d | %d' % (index + 1, data_len))
                print 'has sample %d | %d' % (index + 1, data_len)

                next_data = data_list[index + 1]
                y = self.__sample(next_data, self.__outputDomain)

                symbol_data.append((x, y))

            x_end = self.__sample(data_list[-1][1], self.__inputDomain)
            y_end = [0 for i in range(self.__outputNodes)]
            symbol_data.append((x_end, y_end))

            funcUtil.recordStatus(self.__id, 'finish sample %s' % file_name)
            print 'finish sample %s' % file_name

            funcUtil.recordStatus(self.__id, 'start save %s data' % file_name)
            print 'start save %s data' % file_name

            self.__save(symbol, symbol_data)

            funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name)
            print 'finish saving %s' % file_name

            del self.__com[symbol]
Esempio n. 3
0
class DispatchManager(base.BaseManager):
    def __init__(self, config, db_config):
        self.__threadNumOfDownload = config['thread_num_of_download']
        self.__threadNumOfHandle = config['thread_num_of_handle']
        self.__retryTimes = config['retry_times']
        self.__tmpDataDir = config['tmp_data_dir']
        self.__saveModule = config['save_module']
        self.__id = config['id']
        self.__fileName = config['start_date'] + '_' + config[
            'end_date'] + '_' + str(config['input_nodes']) + '_' + self.__id

        self.__config = config
        self.__dbConfig = db_config

        self.__uriLock = threading.Lock()
        self.__dataLock = threading.Lock()
        self.__fileLock = threading.Lock()

        self.__downloadThreadLock = threading.Lock()

        self.__failQueue = Queue.Queue()
        self.__failUriDict = {}

        self.__com = {}

        self.init()
        self.o_prepare = prepare.Prepare(self.__config, self.__dbConfig)

        self.__curlTime = 0
        self.__curlCounts = 0

        self.__handleTime = 0
        self.__handleCount = 0

    def __prepare(self):
        funcUtil.recordStatus(self.__id, 'preparing dispatch manager ...')

        self.__uriQueue, self.__dataQueue, self.__symbolList = self.o_prepare.run(
        )

        self.__comLock = {}
        for symbol in self.__symbolList:
            if symbol not in self.__com:
                self.__com[symbol] = []
                self.__comLock[symbol] = threading.Lock()

            try:
                tmp_path = os.path.join(self.__tmpDataDir,
                                        symbol + '_' + self.__fileName)
                if os.path.exists(tmp_path):
                    os.remove(tmp_path)
            except Exception, ex:
                print ex
                funcUtil.write_log('deleteTmp')

        funcUtil.recordStatus(self.__id,
                              'finish preparing dispatch manager ...')
Esempio n. 4
0
    def __prepare(self):
        funcUtil.recordStatus(self.__id, 'preparing dispatch manager ...')

        self.__uriQueue, self.__dataQueue, self.__symbolList = self.o_prepare.run(
        )

        self.__comLock = {}
        for symbol in self.__symbolList:
            if symbol not in self.__com:
                self.__com[symbol] = []
                self.__comLock[symbol] = threading.Lock()

            try:
                tmp_path = os.path.join(self.__tmpDataDir,
                                        symbol + '_' + self.__fileName)
                if os.path.exists(tmp_path):
                    os.remove(tmp_path)
            except Exception, ex:
                print ex
                funcUtil.write_log('deleteTmp')
Esempio n. 5
0
    def download(self, thread_id):
        try:
            uri = self.__getUri()
            while uri:
                o_download = download.Download(uri, self.__dataQueue)
                o_download.setThreadId(thread_id)
                o_download.run()

                self.__curlTime += o_download.getUsedTime()
                self.__curlCounts += 1
                funcUtil.recordStatus(
                    self.__id, '%s  uri: %s  use time: %.2f  size: %d' %
                    (thread_id, uri, o_download.getUsedTime(),
                     self.__uriQueue.qsize()))
                print thread_id + '    uri: ' + uri + '   use time: ' + str(
                    o_download.getUsedTime()) + '  size: ' + str(
                        self.__uriQueue.qsize())

                self.__addFailUri(uri, o_download.getErrorQueue())

                uri = self.__getUri()
        except Exception, ex:
            print ex
Esempio n. 6
0
            print 'finish sample %s' % file_name

            funcUtil.recordStatus(self.__id, 'start save %s data' % file_name)
            print 'start save %s data' % file_name

            self.__save(symbol, symbol_data)

            funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name)
            print 'finish saving %s' % file_name

            del self.__com[symbol]

        for symbol, data in self.__com.iteritems():
            self.__com[symbol].sort(Save.sortByDate)

            funcUtil.recordStatus(self.__id, 'sampling %s data ...' % symbol)
            print 'sampling %s data ...' % symbol

            symbol_data = []
            data_list = self.__com[symbol]
            data_len = len(data_list)
            for index, value in enumerate(data_list[0:-1]):
                x = self.__sample(value[1], self.__inputDomain)
                funcUtil.recordStatus(
                    self.__id, 'has sample %d | %d' % (index + 1, data_len))
                print 'has sample %d | %d' % (index + 1, data_len)

                next_data = data_list[index + 1]
                y = self.__sample(next_data, self.__outputDomain)

                symbol_data.append((x, y))
Esempio n. 7
0
    def __process(self):
        # self.__calculateDomain()

        for file_name in os.listdir(self.__tmpDataDir):
            symbol = file_name.split('_')[0]
            if file_name[len(symbol) + 1:] != self.__fileName:
                continue

            funcUtil.recordStatus(self.__id,
                                  'sampling %s data ...' % file_name)
            print 'sampling %s data ...' % file_name

            try:
                tmp_path = os.path.join(self.__tmpDataDir, file_name)
                with open(tmp_path, 'r') as f:
                    content = f.read()
                    content = content.split('\n')
                    for line in content:
                        if not line:
                            continue
                        self.__com[symbol] += json.loads(line)

                os.remove(tmp_path)
            except Exception, ex:
                print ex
                funcUtil.write_log('getDataProcess')

            self.__com[symbol].sort(Save.sortByDate)

            symbol_data = []
            data_list = self.__com[symbol]
            data_len = len(data_list)
            for index, value in enumerate(data_list[0:-self.__xDays -
                                                    self.__yDays + 1]):
                tmp_x = [value[1][-1][1]]
                for i in range(self.__xDays - 1):
                    tmp_x.append(data_list[index + i + 1][1][-1][1])
                x = tmp_x

                # x = self.__sample(value[1])
                funcUtil.recordStatus(
                    self.__id, 'has sample %d | %d' % (index + 1, data_len))
                print 'has sample %d | %d' % (index + 1, data_len)

                next_data_start = data_list[index + self.__xDays]
                next_data_end = data_list[index + self.__yDays + self.__xDays -
                                          1]
                y = (next_data_start[1][0][1], next_data_end[1][-1][1])

                symbol_data.append((x, y))

            tmp_x = [data_list[-self.__xDays][1][-1][1]]
            for i in range(self.__xDays - 1):
                tmp_x.append(data_list[-self.__xDays + i + 1][1][-1][1])
            x_end = tmp_x
            y_end = (0, 0)
            symbol_data.append((x_end, y_end))

            funcUtil.recordStatus(self.__id, 'finish sample %s' % file_name)
            print 'finish sample %s' % file_name

            funcUtil.recordStatus(self.__id, 'start save %s data' % file_name)
            print 'start save %s data' % file_name

            self.__save(symbol, symbol_data)

            funcUtil.recordStatus(self.__id, 'finish saving %s' % file_name)
            print 'finish saving %s' % file_name

            del self.__com[symbol]