def search_pre(self, files, task_id, file_dir, curr_file, curr_num): # 搜索上一个 pre_num = str(int(curr_num.split('-')[0]) - 1) task_id_prefix = f'{task_id}.' if self.aios_redis.get(f'plus_uploader:{task_id}:{pre_num}'): pre_file = f'{task_id_prefix}{pre_num}' if pre_file in files: self.aios_print(f'合并文件A: [{pre_file}] <= [{curr_file}]') new_file = self.zip_region( f'{task_id_prefix}{pre_num}-{curr_file.replace(task_id_prefix, "")}' ) self.merge_part_files(file_dir, pre_file, curr_file, new_file) curr_file = new_file _.remove(files, lambda x: x == pre_file) else: pre_file = _.find(files, lambda x: re.match(f'.*\.\d+-{pre_num}', x)) if pre_file: self.aios_print(f'合并文件B: [{pre_file}] <= [{curr_file}]') pre_num_joint = pre_file.replace(task_id_prefix, '') new_file = self.zip_region( f'{task_id_prefix}{pre_num_joint}-{curr_file.replace(task_id_prefix, "")}' ) self.merge_part_files(file_dir, pre_file, curr_file, new_file) curr_file = new_file _.remove(files, lambda x: x == pre_file) return curr_file
def search_next(self, files, task_id, file_dir, curr_file, curr_num): # 搜索下一个 next_num = str(int(curr_num.split('-')[-1]) + 1) task_id_prefix = f'{task_id}.' if self.aios_redis.get(f'plus_uploader:{task_id}:{next_num}'): next_file = f'{task_id_prefix}{next_num}' if next_file in files: self.aios_print(f'合并文件A: [{curr_file}] => [{next_file}]') new_file = self.zip_region( f'{task_id_prefix}{curr_file.replace(task_id_prefix, "")}-{next_num}' ) self.merge_part_files(file_dir, curr_file, next_file, new_file) curr_file = new_file _.remove(files, lambda x: x == next_file) else: next_file = _.find( files, lambda x: re.match(f'.*\.{next_num}-\d+', x)) if next_file: self.aios_print(f'合并文件B: [[{curr_file}]] => [{next_file}]') next_num_joint = next_file.replace(task_id_prefix, '') new_file = self.zip_region( f'{task_id_prefix}{curr_file.replace(task_id_prefix, "")}-{next_num_joint}' ) self.merge_part_files(file_dir, curr_file, next_file, new_file) curr_file = new_file _.remove(files, lambda x: x == next_file) return curr_file
def _flatten_conf(conf, new_conf): for _new_conf in new_conf: pydash.remove(conf, lambda x: x['conf'] == _new_conf['conf']) conf.extend(new_conf) # # if __name__ == "__main__": # test_case_collector = TestCaseCollector() # #create_client() # test_case_collector.store_tests()
def multi_process_handler(self, msg_list): '''多进程处理文件合并消息 ''' try: from app import aios_redis self.aios_redis = aios_redis # 启动线程做定时保存缓存到数据库 # self.start_thread_save_db(msg_list) # 区分出不需要合并的分片和需要合并的分片 # 不需要合并的分片直接改名即可 single_chunk_files = _.remove( msg_list, lambda x: x.get('curr_chunk') == 1 and x.get( 'total_chunks') == 1) multi_chunk_files = msg_list succ_list = [] err_list = [] sp_file_handler = SubprocessFileHandler() if len(single_chunk_files): _succ_list, _err_list = sp_file_handler.single_file_handler( single_chunk_files) succ_list.extend(_succ_list) err_list.extend(_err_list) if len(multi_chunk_files): default_limit = 4 cpus = min(len(multi_chunk_files), cpu_count(), default_limit) with Pool(processes=cpus) as pool: result = pool.map(sp_file_handler.multi_file_handler, multi_chunk_files) succ_list.extend( _.chain(result).filter_(lambda x: x[0] is not None). map_(lambda x: x[0]).value()) err_list.extend( _.chain(result).filter_(lambda x: x[1] is not None). map_(lambda x: x[1]).value()) return succ_list, err_list except Exception as err: print('❌multi_process_handler❌', err) return [], [{ 'file_key': i['file_key'], 'curr_chunk': i['curr_chunk'] } for i in msg_list]
def file_upload_async(): print("start", request.form) task_id = request.form.get('task_id') file_cache = FileCache(task_id) key = None fileHandler = FileHandler() file = request.files['file'] sub_dir = request.form.get('sub_dir') or "/upload" # chunkNumber从1开始 chunk = request.form.get('chunkNumber', type=int, default=1) totalChunks = request.form.get('totalChunks', type=int) if not file: return bad_request('file') if not sub_dir: return bad_request('sub_dir') if not task_id: return bad_request('task_id') if not totalChunks: return bad_request('totalChunks') try: user_id = g.user_id if hasattr(g, 'user_id') else 0 absolute_dir_path = os.path.join(Utils.get_upload_path(), str(user_id), str(task_id)) Utils.try_makedirs(absolute_dir_path) if file_cache.get_chunk_status(chunk): return standard_expection( f'当前文件{task_id}的第{chunk}个分片已经上传, 请勿重复操作!') tenant_id = g.tenant_id if hasattr(g, 'tenant_id') else 0 exist_task = TaskModel.query.filter( TaskModel.tenant_id == tenant_id, TaskModel.task_id == task_id).first() if exist_task and exist_task.status == TASK_STATUS_MERGED: return standard_expection(f'当前文件{task_id}已经上传完成, 请勿重复操作!') file.save(os.path.join(absolute_dir_path, f'{task_id}{chunk}')) file_cache.set_chunk_status(chunk) key = task_id fileHandler.log_print(key, chunk, f'{chunk}/{totalChunks}') # 第一个分片时开启后台线程监控分片状态,并行合并 if chunk == 1: target_filename = request.form.get('filename') merged_file = os.path.join(absolute_dir_path, target_filename) part_file = os.path.join(absolute_dir_path, key) args = (key, totalChunks, part_file, merged_file, tenant_id, user_id) threading.Thread(target=fileHandler.porter_running, args=args, daemon=True).start() while not file_cache.lock(): # print(datetime.datetime.now(), f"{task_id} set lock waiting chunk {chunk}") fileHandler.log_print(key, chunk, 'task waiting') time.sleep(0.2) else: # print(datetime.datetime.now(), f"{task_id} set lock success chunk {chunk}") # ============================独占状态============================ # 初始化需要被处理的分片序号,逐一pick掉,在最后一片时轮询检查后台线程合并状态 file_cache.ready_chunks(totalChunks) # 从chunks列表移除当前分片序号 chunks_values = file_cache.get_ready_chunks() _.remove(chunks_values, lambda x: x == chunk) file_cache.set_ready_chunks(chunks_values) # 释放文件task锁,以便中间分片能及时响应客户端 file_cache.release_lock() # ============================独占状态 END============================ # 最后一个分片时 if len(chunks_values) == 0: task = None # 轮询检查分片是否完整 is_completed = False start = time.time() while is_completed is False and time.time( ) - start <= current_app.config['REDIS_CACHE_EXPIRE_FILE']: time.sleep(0.5) counter = file_cache.get_counter() if counter is None: # 有可能此时后台合并线程还没有来得及创建counter_key continue if isinstance(counter, bytes): counter = int(counter) print( datetime.datetime.now(), f'#### counter {counter}, total chunks {totalChunks}') # 期望的分片列表 [1...totalChunks] # totalChunks: [i + 1 for i in range(totalChunks)] # 当前的分片列表 [x...counter] # counter: [i + 1 for i in range(counter)] expect_partitions = sorted( [i + 1 for i in range(totalChunks)]) current_partitions = sorted( [i + 1 for i in range(counter)]) is_completed = expect_partitions == current_partitions fileHandler.log_print( key, chunk, 'is_completed:{}'.format(is_completed)) else: # 多人同时上传文件时,porter_running可能出现延迟,需要等待一会 wait_start = time.time() task_link = file_cache.get_task_completed() print( datetime.datetime.now(), f'#### task link {task_link}, wait_start: {wait_start}' ) while is_completed is True and time.time( ) - wait_start <= 2000 and task_link is None: time.sleep(0.5) task_link = file_cache.get_task_completed() # print(datetime.datetime.now(), '#### task link', task_link) if task_link is None: return standard_expection('文件检测超时.') return standard_response(task_link, 200) else: # 中间分片,则结束 fileHandler.log_print(key, chunk, 'is_completed:False') print(datetime.datetime.now(), "is_completed:False", key, chunk) return standard_response(str(chunk), 200) except Exception as err: print("error: ", err) import traceback traceback.print_exc() return standard_expection('上传失败!') finally: file_cache.release_lock() print("end", request.form)
def test_remove(case, filter_by, expected): original = list(case) assert _.remove(case, filter_by) == expected assert set(case).intersection(expected) == set([]) assert set(original) == set(case + expected)