def run_ocr(self, lang: str, time_start: str, time_end: str, conf_threshold: int, tesseract_config: str, roi=[[0, 1], [0, 1]], debug=bool, num_jobs=int) -> None: self.lang = lang self.tesseract_config = tesseract_config self.roi = roi self.debug = debug self.num_jobs = num_jobs ocr_start = utils.get_frame_index(time_start, self.fps) if time_start else 0 ocr_end = utils.get_frame_index( time_end, self.fps) if time_end else self.num_frames if ocr_end < ocr_start: raise ValueError('time_start is later than time_end') num_ocr_frames = ocr_end - ocr_start # get frames from ocr_start to ocr_end with Capture(self.path) as v: v.set(cv2.CAP_PROP_POS_FRAMES, ocr_start) frames = (v.read()[1] for _ in range(num_ocr_frames)) # perform ocr to frames in parallel if num_jobs is not None: it_ocr = p_tqdm.p_imap(self._image_to_data, range(num_ocr_frames), frames, num_cpus=num_jobs) else: it_ocr = p_tqdm.p_imap(self._image_to_data, range(num_ocr_frames), frames) self.pred_frames = [ PredictedFrame(i + ocr_start, data, conf_threshold) for i, data in enumerate(it_ocr) ]
def store_img(img_list, f_format, path): if not os.path.exists(path): os.mkdir(path) # 下載並儲存所有影像檔 try: img = p_imap(download_img, img_list) for index, item in enumerate(img): content = f'{path}/{index}.{f_format}' if not os.path.exists(content): with open(content, 'wb') as file: file.write(item) except Exception as e: print(f'影像下載失敗,原因:{e}')
def build_BP_mat(api_sets, num_apis): comb_func = lambda api_list: np.array(list(combinations(api_list, r=2))) row = [] col = [] for combos in p_imap(comb_func, api_sets): row.extend(combos[:,0]) col.extend(combos[:,1]) mat = sparse.csr_matrix(([True]*len(row), (row, col)), shape=(num_apis, num_apis), dtype=bool) del row, col mat.setdiag(True) mat += mat.T return mat
def evaluate_parallel(self, func, *args, nprocs=None, **kwargs): """Evaluate model locally in parallel. All detected processors will be used if `nprocs` is not specified. Parameters ---------- func : function, Model, or function that wraps a model, to be run in parallel. The provided function needs to accept a numpy array of inputs as its first parameter and must return a numpy array of results. nprocs : int, Number of processors to use. Uses all available if not specified. *args : list, Additional arguments to be passed to `func` **kwargs : dict, Additional keyword arguments passed to `func` Returns ---------- self : ProblemSpec object """ warnings.warn("This is an experimental feature and may not work.") if self._samples is None: raise RuntimeError("Sampling not yet conducted") if nprocs is None: nprocs = cpu_count() # Create wrapped partial function to allow passing of additional args tmp_f = self._wrap_func(func, *args, **kwargs) # Split into even chunks chunks = np.array_split(self._samples, int(nprocs), axis=0) if ptqdm_available: # Display progress bar if available res = p_imap(tmp_f, chunks, num_cpus=nprocs) else: with Pool(nprocs) as pool: res = list(pool.imap(tmp_f, chunks)) self._results = self._collect_results(res) return self
def mp_mugenerate_voted_list(target_dir, weight=None): # 计时 start = time.time() file_num = len(test_list) count = 0 iterator = p_imap(partial(mp_vote_img_weight, target_dir=target_dir, weight=weight), test_list, position=0, leave=True, ncols=100, dynamic_ncols=False) for img_name in iterator: pass return
def compute(predictions, references, rouge_types=None, use_aggregator=True, use_parallel=False, show_progress=False): if rouge_types is None: rouge_types = ['rouge1'] scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=False) aggregator = rouge_scorer.scoring.BootstrapAggregator( ) if use_aggregator else None if not use_aggregator and use_parallel: scores = list( p_imap(lambda x: scorer.score(x[0], x[1]), list(zip(references, predictions)))) else: scores = [] if show_progress: for i in tqdm(range(len(references))): score = scorer.score(references[i], predictions[i]) if use_aggregator: aggregator.add_scores(score) else: scores.append(score) else: for i in range(len(references)): score = scorer.score(references[i], predictions[i]) if use_aggregator: aggregator.add_scores(score) else: scores.append(score) if use_aggregator: result = aggregator.aggregate() else: result = {} for key in scores[0]: result[key] = list(score[key] for score in scores) return result
def extract_features(self): print('[INFO] Extract features from all audio files') all_lists = self.meta_data.file_list() fnames = list(set(all_lists[0])) # prepare extractor feat_type = self.config['features']['type'] extractor = F.prepare_extractor(feats=feat_type, params=self.config['features']) writer = io.HDFWriter(file_name=self.feat_file) iterator = p_imap(lambda fn: self._extraction_job(fn, extractor, self.meta_data.data_dir), fnames) for result in iterator: for fn, feat in result.items(): fid = path.basename(fn) writer.append(file_id=fid, feat=feat) writer.close() del writer print('Files processed: %d' % len(fnames))
raw_target = ' '.join( paragraph_toks_from_html(resolve_course(target_note_str))) if raw_target in seen_targets: print('Duplicate hospital course. MRN={}. Account={}.'.format( mrn, account)) continue seen_targets.add(raw_target) examples.append( (mrn, account, len(source_note_str), len(target_note_str), source_note_str, target_note_str)) if len(examples) > 0: df = pd.DataFrame(examples, columns=[ 'mrn', 'account', 'source_len', 'target_len', 'source_str', 'target_str' ]) df.to_csv(examples_fn, index=False) return 1 return 0 if __name__ == '__main__': mrn_status_df, mrn_valid_idxs, mrns = get_mrn_status_df('valid_account') n = len(mrns) print('Processing {} mrns'.format(n)) statuses = list(p_imap(generate_examples, mrns, num_cpus=0.8)) print('{} out of {} are valid'.format(sum(statuses), len(statuses))) update_mrn_status_df(mrn_status_df, list(statuses), mrn_valid_idxs, 'valid_example')
def process_image(row): try: dcm = pydicom.dcmread(f"../../data/raw/rsna-intracranial-hemorrhage-detection/stage_2_train/{row[1]}.dcm") return row[1], dcm.pixel_array, row[2:] except: return None, None, None # Transforming the manifest such that each row is one image and each column is a label. manifest = pd.read_csv("../../data/interim/manifest.csv") split_id = manifest["ID"].str.rsplit('_', n=1, expand=True) manifest["ID"] = split_id[0] manifest["Subtype"] = split_id[1] manifest = manifest.pivot(index="ID", columns="Subtype", values="Label").reset_index().rename_axis(None, axis=1) processed_images = p_imap(process_image, list(manifest.itertuples()), num_cpus=cores) ids = [] labels = [] for id, pixels, labels_ in processed_images: # Rarely pydicom fails to read an image, discard it if that's the case. if id is None and pixels is None and labels_ is None: continue # Rarely an image is of a different size than the overwhelming majority, discard it if that's the case. if pixels.shape != (512, 512): continue np.save(f"../../data/processed/images/{id}.npy", pixels) ids.append(id) labels.append(labels_) print(f"Processed {len(ids)} images successfully.") np.save("../../data/processed/ids.npy", np.array(ids)) np.save("../../data/processed/Y.npy", np.array(labels))