Beispiel #1
0
    def determine_shard_size(self, file_size, accumulator):

        # Based on <https://github.com/aleitner/shard-size-calculator/blob/master/src/shard_size.c>

        hops = 0

        if (file_size <= 0):
            return 0
            # if accumulator != True:
            # accumulator  = 0
        logger.debug(accumulator)

        # Determine hops back by accumulator
        if ((accumulator - self.SHARD_MULTIPLES_BACK) < 0):
            hops = 0
        else:
            hops = accumulator - self.SHARD_MULTIPLES_BACK

        # accumulator = 10
        byte_multiple = self.shard_size(accumulator)

        check = file_size / byte_multiple
        # print check
        if (check > 0 and check <= 1):
            while (hops > 0 and self.shard_size(hops) > self.MAX_SHARD_SIZE):
                if hops - 1 <= 0:
                    hops = 0
                else:
                    hops = hops - 1
            return self.shard_size(hops)

        # Maximum of 2 ^ 41 * 8 * 1024 * 1024
        if (accumulator > 41):
            return 0
Beispiel #2
0
 def check_folder(self):
     """
     检测目录结构,不存在就创建
     """
     # select_data 目录
     if not os.path.exists(const.select_data_root_path):
         os.makedirs(const.select_data_root_path)
         logger.info('mkdir %s' % const.select_data_root_path)
     logger.debug('select_data 目录检测完毕!')
Beispiel #3
0
    def join_shards(self, shards_filepath, pattern, destination_file_path):
        # Based on <http://code.activestate.com/recipes/224800-simple-file-splittercombiner-module/>
        import re

        logger.info('Creating file', destination_file_path)

        bname = (os.path.split(destination_file_path))[1]
        bname_input = (os.path.split(shards_filepath))[1]
        bname2_input = bname_input

        input_directory = (os.path.split(shards_filepath))[0]
        output_directory = (os.path.split(destination_file_path))[0]

        # bugfix: if file contains characters like +,.,[]
        # properly escape them, otherwise re will fail to match.
        for a, b in zip(['+', '.', '[', ']', '$', '(', ')'],
                        ['\+', '\.', '\[', '\]', '\$', '\(', '\)']):
            bname2 = bname2_input.replace(a, b)

        chunkre = re.compile(bname2_input + '-' + '[0-9]+')

        chunkfiles = []
        for f in os.listdir(str(input_directory)):
            logger.debug(f)
            if chunkre.match(f):
                chunkfiles.append(f)

        logger.info('Number of chunks', len(chunkfiles))
        chunkfiles.sort(self.sort_index)
        logger.info(chunkfiles)
        data = ''
        for f in chunkfiles:

            try:
                logger.info('Appending chunk',
                            os.path.join(str(input_directory), f))
                data += open(str(input_directory) + "/" + str(f), 'rb').read()
                logger.info(str(input_directory) + "/" + str(f) +
                            "katalog wejsciowy")
            except (OSError, IOError, EOFError) as e:
                logger.error(e)
                continue

        try:
            logger.info(str(output_directory) + "katalog wyjsciowy")
            f = open(str(output_directory) + "/" + str(bname), 'wb')
            f.write(data)
            f.close()
        except (OSError, IOError, EOFError) as e:
            raise ShardingException(str(e))

        logger.info('Wrote file', bname)
        return 1
Beispiel #4
0
 def get_cal_start_date(self):
     """
     获取交易日历文件的起始日期,从2005-01-01开始即可
     """
     if self.debug:
         df = pd.read_csv(const.DEBUG_DATA_STOCK_TRADE_CAL)
     else:
         df = pd.read_csv(const.ORIGIN_DATA_STOCK_TRADE_CAL)
     # logger.info(self.debug)
     df = df.sort_values(by=['cal_date'])
     df = df[(df['cal_date'] > 20050101) & (df['is_open'] > 0)]
     start_date = df['cal_date'].values[0]
     logger.debug(start_date)
     return str(start_date)
Beispiel #5
0
    def save_client_configuration(self, settings_ui):
        # with open(CONFIG_FILE, 'r') as conf_file:
        #     XML_conf_data = conf_file.read().replace('\n', '')

        tree = ET.parse(CONFIG_FILE)

        # root = ET.fromstring(XML_conf_data)
        # doc = ET.SubElement(root, 'client')

        if settings_ui.max_shard_size_enabled_checkBox.isChecked():
            custom_max_shard_size_enabled_checkbox = '1'
        else:
            custom_max_shard_size_enabled_checkbox = '0'

        tree.find('.//custom_max_shard_size_enabled').text = str(
            custom_max_shard_size_enabled_checkbox)
        tree.find('.//max_shard_size').text = str(
            settings_ui.max_shard_size.text())
        tree.find('.//max_connections_onetime').text = str(
            settings_ui.connections_onetime.text())
        tree.find('.//shard_size_unit').text = str(
            settings_ui.shard_size_unit.currentIndex())
        tree.find('.//max_download_bandwidth').text = str(
            settings_ui.max_download_bandwidth.text())
        tree.find('.//max_upload_bandwidth').text = str(
            settings_ui.max_upload_bandwidth.text())
        tree.find('.//default_file_encryption_algorithm').text = str(
            settings_ui.default_crypto_algorithm.currentIndex())
        tree.find('.//bridge_request_timeout').text = str(
            settings_ui.bridge_request_timeout.text())
        tree.find('.//crypto_keys_location').text = str(
            settings_ui.crypto_keys_location.text())
        tree.find('.//ownstorj_port').text = str(
            settings_ui.ownstorj_port.text())

        tree.write(CONFIG_FILE)

        custom_tmp_path = self.get_custom_temp_path()
        logger.debug(custom_tmp_path)
Beispiel #6
0
 def logout(self):
     logger.debug("TODO")
     logger.debug("1")