def determine_shard_size(self, file_size, accumulator): # Based on <https://github.com/aleitner/shard-size-calculator/blob/master/src/shard_size.c> hops = 0 if (file_size <= 0): return 0 # if accumulator != True: # accumulator = 0 logger.debug(accumulator) # Determine hops back by accumulator if ((accumulator - self.SHARD_MULTIPLES_BACK) < 0): hops = 0 else: hops = accumulator - self.SHARD_MULTIPLES_BACK # accumulator = 10 byte_multiple = self.shard_size(accumulator) check = file_size / byte_multiple # print check if (check > 0 and check <= 1): while (hops > 0 and self.shard_size(hops) > self.MAX_SHARD_SIZE): if hops - 1 <= 0: hops = 0 else: hops = hops - 1 return self.shard_size(hops) # Maximum of 2 ^ 41 * 8 * 1024 * 1024 if (accumulator > 41): return 0
def check_folder(self): """ 检测目录结构,不存在就创建 """ # select_data 目录 if not os.path.exists(const.select_data_root_path): os.makedirs(const.select_data_root_path) logger.info('mkdir %s' % const.select_data_root_path) logger.debug('select_data 目录检测完毕!')
def join_shards(self, shards_filepath, pattern, destination_file_path): # Based on <http://code.activestate.com/recipes/224800-simple-file-splittercombiner-module/> import re logger.info('Creating file', destination_file_path) bname = (os.path.split(destination_file_path))[1] bname_input = (os.path.split(shards_filepath))[1] bname2_input = bname_input input_directory = (os.path.split(shards_filepath))[0] output_directory = (os.path.split(destination_file_path))[0] # bugfix: if file contains characters like +,.,[] # properly escape them, otherwise re will fail to match. for a, b in zip(['+', '.', '[', ']', '$', '(', ')'], ['\+', '\.', '\[', '\]', '\$', '\(', '\)']): bname2 = bname2_input.replace(a, b) chunkre = re.compile(bname2_input + '-' + '[0-9]+') chunkfiles = [] for f in os.listdir(str(input_directory)): logger.debug(f) if chunkre.match(f): chunkfiles.append(f) logger.info('Number of chunks', len(chunkfiles)) chunkfiles.sort(self.sort_index) logger.info(chunkfiles) data = '' for f in chunkfiles: try: logger.info('Appending chunk', os.path.join(str(input_directory), f)) data += open(str(input_directory) + "/" + str(f), 'rb').read() logger.info(str(input_directory) + "/" + str(f) + "katalog wejsciowy") except (OSError, IOError, EOFError) as e: logger.error(e) continue try: logger.info(str(output_directory) + "katalog wyjsciowy") f = open(str(output_directory) + "/" + str(bname), 'wb') f.write(data) f.close() except (OSError, IOError, EOFError) as e: raise ShardingException(str(e)) logger.info('Wrote file', bname) return 1
def get_cal_start_date(self): """ 获取交易日历文件的起始日期,从2005-01-01开始即可 """ if self.debug: df = pd.read_csv(const.DEBUG_DATA_STOCK_TRADE_CAL) else: df = pd.read_csv(const.ORIGIN_DATA_STOCK_TRADE_CAL) # logger.info(self.debug) df = df.sort_values(by=['cal_date']) df = df[(df['cal_date'] > 20050101) & (df['is_open'] > 0)] start_date = df['cal_date'].values[0] logger.debug(start_date) return str(start_date)
def save_client_configuration(self, settings_ui): # with open(CONFIG_FILE, 'r') as conf_file: # XML_conf_data = conf_file.read().replace('\n', '') tree = ET.parse(CONFIG_FILE) # root = ET.fromstring(XML_conf_data) # doc = ET.SubElement(root, 'client') if settings_ui.max_shard_size_enabled_checkBox.isChecked(): custom_max_shard_size_enabled_checkbox = '1' else: custom_max_shard_size_enabled_checkbox = '0' tree.find('.//custom_max_shard_size_enabled').text = str( custom_max_shard_size_enabled_checkbox) tree.find('.//max_shard_size').text = str( settings_ui.max_shard_size.text()) tree.find('.//max_connections_onetime').text = str( settings_ui.connections_onetime.text()) tree.find('.//shard_size_unit').text = str( settings_ui.shard_size_unit.currentIndex()) tree.find('.//max_download_bandwidth').text = str( settings_ui.max_download_bandwidth.text()) tree.find('.//max_upload_bandwidth').text = str( settings_ui.max_upload_bandwidth.text()) tree.find('.//default_file_encryption_algorithm').text = str( settings_ui.default_crypto_algorithm.currentIndex()) tree.find('.//bridge_request_timeout').text = str( settings_ui.bridge_request_timeout.text()) tree.find('.//crypto_keys_location').text = str( settings_ui.crypto_keys_location.text()) tree.find('.//ownstorj_port').text = str( settings_ui.ownstorj_port.text()) tree.write(CONFIG_FILE) custom_tmp_path = self.get_custom_temp_path() logger.debug(custom_tmp_path)
def logout(self): logger.debug("TODO") logger.debug("1")